-
Notifications
You must be signed in to change notification settings - Fork 3
/
mdnversion
89 lines (84 loc) · 2.71 KB
/
mdnversion
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
import os
import json
import re
def decodeLine(line):
line = line.replace('"','"')
line = line.replace('<','<')
line = line.replace('>','>')
line = line.replace('&','&')
line = line.replace(' ',' ')
line = line.replace('—','—')
return line
with open('fixes2.txt') as f:
data = f.read()
diction = json.loads(data)
class isScanFlags:
def __init__(self):
self.mainflag = 1
self.textflag = 1
def newArticleReset(self):
self.templateflag = 0
self.externalflag = 0
self.refflag = 0
self.fileflag = 0
self.quoteflag = 0
self.quote2flag = 0
self.quote3flag = 0
self.commentflag = 0
self.divflag = 0
self.codeflag = 0
self.poemflag = 0
self.galleryflag = 0
self.Galleryflag = 0
self.sourceflag = 0
mainstr = '<ns>0</ns>'
endpage = '</page>'
mainflag = 1
textflag = 1
typos = []
checkedWords = []
title = ''
f = open('demofile2.txt', 'w')
flags = isScanFlags()
rootdir = '/Users/mbpmbp/Documents/GitHub/content'
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if file == '_wikihistory.json':
continue
#flags.newArticleReset()
with open(os.path.join(subdir, file)) as textfile:
if '.md' not in textfile.name:
continue
for line in textfile:
line = decodeLine(line)
fileflag = 0
quote2flag = 0
quoteflag = 0
previous = next_ = ''
line = line.rstrip('\n')+' '
objects = re.split(r"([\-{}\[,.\";\^~#\=&\)\|<>\?]*\s+[\-{},.\";\^~#\=&\)\|>\?]*)", line)
l = len(objects)-2
for index, word in enumerate(objects):
if word == ' ':
continue
if not word.isalpha():
continue
if len(word) < 5:
continue
if not word.islower():
continue
if word in checkedWords:
continue
checkedWords.append(word)
if word in typos:
break
if word in diction:
f.write('\n'+title.replace(' <title>','== [[').replace('</title>',']] =='))
f.write(word+'-><!--')
print (word)
fix = diction[word].split(",")[0]
f.write(fix+'-->'+textfile.name)
f.flush()
break
f.close()