forked from KevinCham1993/Muthu-s-api
-
Notifications
You must be signed in to change notification settings - Fork 0
/
batch_script.py
87 lines (69 loc) · 3.61 KB
/
batch_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import sys
import json
import sqlite3
import shlex, subprocess
#set debug to 1 if you want debug messages throughout the code, else 0.
debug = 1
def populate_sqlite_db():
"""Transfer json files into sqlite database.
And add new tables(forum, post2, post3, comment2, comment3).
Modify the database into canonical format.
"""
subprocess.run("cd WriteJsonDataToSQLite", shell=True)
subprocess.run("py batch_script.py", shell=True)
# subprocess.run("python writeToSQLite.py", shell=True)
# subprocess.run("python fromOriginalToCanonical.py", shell=True)
# subprocess.run("python transferFromTables.py", shell=True)
# subprocess.run("python transferFromTables.py", shell=True)
def populate_intervened_posts():
"""Extract the posts and comments that have been intervened by the instructor for at least once.
This should populate records in tables post2 and comment2 with posts from threads intervened at least once by an instructor or TA.
"""
subprocess.run("perl make_noinstructor_corpus.pl -dbname coursera -course eQJvsjn9EeWJaxK5AT4frw -density", shell=True)
def update_docid():
"""creates document ids, one per each thread.
This is useful when threads from all the courses are in the same database.
This is a required field for feature extraction.
"""
subprocess.run("perl updatedocid.pl -dbname coursera -course eQJvsjn9EeWJaxK5AT4frw", shell=True)
def compute_term_weights():
"""Compute the term weights and insert into the database.
Run this method twice in the pipeline to collect TFs and populate it in termfreqc14inst and termfreqc14noinst.
"""
subprocess.run("perl compute_term_weights.pl -dbname coursera -course eQJvsjn9EeWJaxK5AT4frw -uni -tf -thread inst", shell=True)
subprocess.run("perl compute_term_weights.pl -dbname coursera -course eQJvsjn9EeWJaxK5AT4frw -uni -tf -thread noinst", shell=True)
def generate_feature():
subprocess.run("perl gen_features.pl -course eQJvsjn9EeWJaxK5AT4frw -dbname coursera -uni -allf", shell=True)
def classifier_model():
subprocess.run("perl predict_thread_intervention.pl -dbname coursera -course eQJvsjn9EeWJaxK5AT4frw -w nve -in uni+forum+affir+tprop+nums+nont_course+course+_eQJvsjn9EeWJaxK5AT4frw.txt -model uni+forum+affir+tlen+nums+nont_course_.model_0", shell=True)
if __name__ == '__main__':
courseids = []
if debug:
print("batch script starts running...")
dir_path = os.path.dirname(os.path.realpath(__file__))
os.chdir(dir_path + "/WriteJsonDataToSQLite")
print("#Current path is" + os.getcwd())
# input all course ids
courseids.append("eQJvsjn9EeWJaxK5AT4frw")
if debug:
print("****** course ids:")
print(courseids)
populate_sqlite_db()
# Copy transfered database to data file
subprocess.run("copy coursera.db " + dir_path + "\lib4moocdata\coursera\data", shell=True) # Be careful with the \ and / in different OS
# print("copy coursera.db " + dir_path + "/lib4moocdata/coursera/data")
os.chdir(dir_path + "/lib4moocdata/coursera/bin")
print("#Current path is" + os.getcwd())
populate_intervened_posts()
update_docid()
compute_term_weights()
generate_feature()
classifier_model()
# Copy resultant database to front-end folder
os.chdir(dir_path + "/lib4moocdata/coursera/data")
print("#Current path is" + os.getcwd())
subprocess.run("copy coursera.db " + dir_path + "\..\dashboard_coursera-master", shell=True) # Be careful with the \ and / in different OS
print("copy coursera.db " + dir_path + "\..\dashboard_coursera-master")
if debug:
print("batch script completed")