Skip to content
Newer
Older
100644 111 lines (84 sloc) 3.41 KB
8218c44 @neilkod initial, super-hacky
neilkod authored
1 #!/usr/bin/python
2 from collections import defaultdict
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
3 import operator
4
5
6 # note: the reason I'm multiplying session_id * 2 is because the source data
7 # has overlap between voter_id and session_id. Hopefully there is no
8 # overlap between voter_id and session_creator_id. I have not yet
9 # tested for that. Gephi will warn me, on the import, if we re-use
10 # an id.
8218c44 @neilkod initial, super-hacky
neilkod authored
11
12 person = {}
13 sessions = {}
14 votes = defaultdict(int)
9129ad5 @neilkod added some basic analytics
neilkod authored
15 votes_by_person = defaultdict(int)
16
17
8218c44 @neilkod initial, super-hacky
neilkod authored
18
19
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
20 VOTES_LIMIT = 5
21
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
22
8218c44 @neilkod initial, super-hacky
neilkod authored
23
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
24 # sessions first
25 # for the time being, we don't care about the session itself, just who
26 # created it
8218c44 @neilkod initial, super-hacky
neilkod authored
27 f = open('data/sessions.dat')
28 for line in f:
29 session_id,session_name, session_created_by = line.strip().split('|')
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
30 session_id = int(session_id)
31 session_created_by = int(session_created_by)
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
32
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
33 # session name gets imported in double-quotes, lets get rid of the quotes
34 session_name = session_name.split('"')[1]
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
35
9129ad5 @neilkod added some basic analytics
neilkod authored
36 sessions[session_id] = {'name': session_name, 'created_by': session_created_by,'votes': 0}
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
37
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
38 if session_created_by not in person.values():
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
39 person[(session_created_by)] = {"name": "session_creator-%s" % session_id,
40 "type" : "session creator"}
41
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
42 f.close()
8218c44 @neilkod initial, super-hacky
neilkod authored
43
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
44 # read the voters, write to a file
8f1cd6c @gregrahn adding in scripts to pull data and new data files
authored
45 f = open('data/users.dat')
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
46 for line in f:
47 voter_id,voter_name = line.strip().split('|')
48 voter_id = int(voter_id)
49 voter_name = voter_name.replace('"','')
50 person[voter_id] = {"name": voter_name, "type": "voter"}
8218c44 @neilkod initial, super-hacky
neilkod authored
51 f.close()
52
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
53 # now that our nodes are complete, write them to the gdf file
54 # create gephi file, write the header
55 out_file = open('oow.gdf', 'w')
56 out_file.write('nodedef> name VARCHAR, label VARCHAR, type VARCHAR\n')
57
58 # write the nodes. leave the file open because we need to write edges
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
59 for id, vals in person.iteritems():
60 out = '%s,%s,%s\n' % (id, vals['name'],vals['type'])
61 out_file.write(out)
62
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
63 # read the votes, store in a dict where the key is (voter, session_created_by)
8218c44 @neilkod initial, super-hacky
neilkod authored
64 f = open('data/votes.dat')
65 for line in f:
66 session_id, voter_id = line.strip().split('|')
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
67 voter_id = int(voter_id)
68 session_id = int(session_id)
69 votes[(voter_id, sessions[session_id]['created_by'])] += 1
9129ad5 @neilkod added some basic analytics
neilkod authored
70 sessions[session_id]['votes'] += 1
71
72 # this might be interesting
73 votes_by_person[voter_id] += 1
8218c44 @neilkod initial, super-hacky
neilkod authored
74 f.close()
75
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
76 # in this case, the source is the voter and the target is the session creator
8218c44 @neilkod initial, super-hacky
neilkod authored
77 out_file.write('edgedef> source VARCHAR, target VARCHAR, vote_cnt INT\n')
78 for (v1,v2),vote_cnt in votes.iteritems():
79 out = "%s,%s,%s\n" % (v1,v2,vote_cnt)
80 out_file.write(out)
81
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
82 out_file.close()
8218c44 @neilkod initial, super-hacky
neilkod authored
83
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
84 # now the fun part.
85 # lets see who has voted for who the most times
86 srtd_by_votes = sorted(votes.iteritems(),key=operator.itemgetter(1))
87
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
88 # lets identify anyone who voted for someone else at least VOTES_LIMIT times
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
89 top_combos = [x for x in srtd_by_votes if x[1] >= VOTES_LIMIT]
da7cdb3 @neilkod cleaned up, added some basic analysis
neilkod authored
90 print "people who have voted for someone at least %d times" % VOTES_LIMIT
6346410 @neilkod cleaned up, added some basic analysis
neilkod authored
91 for itm in top_combos:
92 print "%s voted for %s %d times" % (person[itm[0][0]]['name'], person[itm[0][1]]['name'], itm[1])
8218c44 @neilkod initial, super-hacky
neilkod authored
93
9129ad5 @neilkod added some basic analytics
neilkod authored
94 # can't figure this out, don't have time
95 mostvotes = sorted(votes_by_person.iteritems(),key=operator.itemgetter(1))
96
97 #people who have voted the most
98 print ""
99 print "top ten voters"
100 print "=== === ======"
101 for name,votes in mostvotes[-10:]:
102 print person[name]['name'], votes
103
104 print ""
105 # top sessions by # of votes
106 print "top sessions by number of votes"
107 print "=== ======== == ====== == ====="
108 xx = sorted([(v['votes'],k) for (k,v) in sessions.iteritems()])[-15:]
109 for v,k in xx:
110 print v,sessions[k]['name'],person[sessions[k]['created_by']]['name']
Something went wrong with that request. Please try again.