-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyPilotly.py
62 lines (38 loc) · 1.41 KB
/
pyPilotly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
'''
Created on Aug 26, 2015
@author: Tony
'''
import os
import csv
import json
import glob
import time
import nltk
import pandas
from sklearn.feature_extraction.text import TfidfVectorizer
sTime = time.time()
CsvList = glob.glob('data' +os.sep+ '*.csv')
DataMap = {}
for inputFiles in CsvList:
inputNameNoExtension, inputNameExtension = os.path.splitext(inputFiles)
DataFrameObject = pandas.read_csv(inputFiles, header=0, na_values='.')
DataMap[os.path.basename(inputNameNoExtension)] = DataFrameObject
dfUserProfiles = DataMap['UserProfiles_Basic_081515']
dfPanelResponses = DataMap['panel_responses']
dfShowData = DataMap['ShowData']
dfEpisodeData = DataMap['EpisodeData']
dfProfilesResponses = pandas.merge(dfUserProfiles, dfPanelResponses, on='user_id')
print dfProfilesResponses.columns.values
dfProfilesResponses.to_csv('data' +os.sep+ 'ProfilesResponses.csv')
dfProfilesResponsesShowEpisode = pandas.merge(dfShowData, dfEpisodeData, on='video_id')
print dfProfilesResponsesShowEpisode.columns.values
print("Duration: %s" % (time.time() - sTime))
# csvfile = open(inputNameNoExtension + inputNameExtension, 'rb')
# jsonfile = open(inputNameNoExtension + '.json', 'wb')
#
# reader = csv.reader(csvfile)
# headers = reader.next()
# reader = csv.DictReader( csvfile, headers )
# for row in reader:
# json.dump(row, jsonfile)
# jsonfile.write('\n')