Permalink
Newer
Older
100755 152 lines (125 sloc) 4.45 KB
Jan 21, 2016 @azubiaga first version
1 <?php
2 ini_set("memory_limit", "2048M");
3
4 function get_replying_ids ($tweetid, $username) {
5 global $replyingids;
6
7 $maxposition = "";
8
9 do {
10 if ($maxposition == "") {
11 $url = "https://twitter.com/" . $username . "/status/" . $tweetid;
12 }
13 else {
14 $url = "https://twitter.com/i/" . $username . "/conversation/" . $tweetid . "?include_available_features=1&include_entities=1&max_position=" . $maxposition;
15 }
16 $content = shell_exec("wget \"" . $url . "\" -q --load-cookies=./cookies.txt -O -");
17 $content = html_entity_decode(str_replace("\\n", "\n", $content));
18 $content = str_replace("\\u003c", "<", $content);
19 $content = str_replace("\\u003e", ">", $content);
20 $content = str_replace("\\/", "/", $content);
21 $content = str_replace("\\\"", "\"", $content);
22
23 if (preg_match_all("|<a href=\"(/[^/]*/status/[0-9]*)\" class=\"tweet-timestamp js-permalink js-nav js-tooltip\"|U", $content, $reptweets)) {
24 foreach ($reptweets[1] as $key => $reptweet) {
25 $reptweettokens = explode("/", $reptweet);
26 $repusername = $reptweettokens[1];
27 $reptweetid = $reptweettokens[count($reptweettokens) - 1];
28
29 if (!in_array($reptweetid, $replyingids)) {
30 array_push($replyingids, $reptweetid);
31 get_replying_ids($reptweetid, $repusername);
32 }
33 }
34 }
35
36 $maxposition = "";
37 if (preg_match("|data-min-position=\"([^\"]*)\"|U", $content, $mp) || preg_match("|\"min_position\":\"([^\"]*)\"|U", $content, $mp)) {
38 $maxposition = $mp[1];
39 }
40 } while ($maxposition != "");
41 }
42
43 function add_to_structure ($tweetid, $inreplyto) {
44 global $structure;
45
46 foreach ($structure as $id => $substructure) {
47 if ($id == $inreplyto) {
48 $structure[$id] = $tweetid;
49 }
50 else {
51 add_to_structure($tweetid, $inreplyto, $structure[$id]);
52 }
53 }
54 }
55
56 function collect_replying_tweets ($tweetid, $username) {
57 global $argv, $replyingids;
58 $replycount = 0;
59
60 @mkdir("data/" . $tweetid . "/reactions/");
61 @chmod("data/" . $tweetid . "/reactions/", 0777);
62 get_replying_ids($tweetid, $username);
63
64 $idsstr = "";
65 $idcount = 0;
66 $allcount = 0;
67 foreach ($replyingids as $replyingid) {
68 $allcount++;
69 $idsstr .= $replyingid . ",";
70 $idcount++;
71 if ($idcount == 100 || $allcount == count($replyingids)) {
72 $tweets = @shell_exec("python retrieve.tweet.list.py " . substr($idsstr, 0, strlen($idsstr) - 1));
73 $tweets = explode("\n", $tweets);
74 foreach ($tweets as $tweet) {
75 $tweetobj = @json_decode($tweet);
76 if (isset($tweetobj->id_str)) {
77 file_put_contents("data/" . $tweetid . "/reactions/" . $tweetobj->id_str . ".json", $tweet);
78 $replycount++;
79 }
80 }
81
82 $idsstr = "";
83 $idcount = 0;
84 }
85 }
86
87 if (isset($argv[1])) {
88 echo $tweetid . " - source tweet and " . $replycount . " replies collected.\n";
89 }
90 }
91
92 function create_structure($tweetid) {
93 global $structure;
94
95 $parents = array();
96 $dir = dir("data/" . $tweetid . "/reactions/");
97 while (($file = $dir->read()) !== false) {
98 if ($file != "." && $file != "..") {
99 $tweet = json_decode(file_get_contents("data/" . $tweetid . "/reactions/" . $file));
100
101 $inreplyto = $tweet->in_reply_to_status_id_str;
102 $id = $tweet->id;
103
104 if (!isset($parents[$inreplyto])) {
105 $parents[$inreplyto] = array();
106 }
107 array_push($parents[$inreplyto], $id);
108 }
109 }
110
111 foreach ($structure as $sid => $substructure) {
112 if (isset($parents[$sid])) {
113 foreach ($parents[$sid] as $cid) {
114 $structure[$sid][$cid] = array();
115 }
116 }
117 }
118
119 file_put_contents("data/" . $tweetid . "/structure.json", json_encode($structure));
120 chmod("data/" . $tweetid . "/structure.json", 0777);
121 }
122
123 if (!isset($argv[1])) {
124 exit(0);
125 }
126 $tweetid = $argv[1];
127
128 if (strstr($tweetid, "/")) {
129 $tweetid = explode("/", $tweetid);
130 $tweetid = $tweetid[count($tweetid) - 1];
131 }
132
133 $replyingids = array();
134 $structure = array($tweetid => array());
135
136 $sourcetweet = @shell_exec("python retrieve.tweet.py " . $tweetid);
137 $sourcetweetobj = json_decode($sourcetweet);
138 if (isset($sourcetweetobj->id_str)) {
139 $username = $sourcetweetobj->user->screen_name;
140
141 @mkdir("data/" . $tweetid);
142 @chmod("data/" . $tweetid, 0766);
143 @mkdir("data/" . $tweetid . "/source-tweets/");
144 @chmod("data/" . $tweetid . "/source-tweets/", 0766);
145 file_put_contents("data/" . $tweetid . "/source-tweets/" . $tweetid . ".json", $sourcetweet);
146
147 collect_replying_tweets($tweetid, $username);
148
149 create_structure($tweetid);
150 }
151 ?>