Jan 21, 2016
first version
|
|
|
1 |
<?php |
|
2 |
ini_set("memory_limit", "2048M"); |
|
3 |
|
|
4 |
function get_replying_ids ($tweetid, $username) { |
|
5 |
global $replyingids; |
|
6 |
|
|
7 |
$maxposition = ""; |
|
8 |
|
|
9 |
do { |
|
10 |
if ($maxposition == "") { |
|
11 |
$url = "https://twitter.com/" . $username . "/status/" . $tweetid; |
|
12 |
} |
|
13 |
else { |
|
14 |
$url = "https://twitter.com/i/" . $username . "/conversation/" . $tweetid . "?include_available_features=1&include_entities=1&max_position=" . $maxposition; |
|
15 |
} |
|
16 |
$content = shell_exec("wget \"" . $url . "\" -q --load-cookies=./cookies.txt -O -"); |
|
17 |
$content = html_entity_decode(str_replace("\\n", "\n", $content)); |
|
18 |
$content = str_replace("\\u003c", "<", $content); |
|
19 |
$content = str_replace("\\u003e", ">", $content); |
|
20 |
$content = str_replace("\\/", "/", $content); |
|
21 |
$content = str_replace("\\\"", "\"", $content); |
|
22 |
|
|
23 |
if (preg_match_all("|<a href=\"(/[^/]*/status/[0-9]*)\" class=\"tweet-timestamp js-permalink js-nav js-tooltip\"|U", $content, $reptweets)) { |
|
24 |
foreach ($reptweets[1] as $key => $reptweet) { |
|
25 |
$reptweettokens = explode("/", $reptweet); |
|
26 |
$repusername = $reptweettokens[1]; |
|
27 |
$reptweetid = $reptweettokens[count($reptweettokens) - 1]; |
|
28 |
|
|
29 |
if (!in_array($reptweetid, $replyingids)) { |
|
30 |
array_push($replyingids, $reptweetid); |
|
31 |
get_replying_ids($reptweetid, $repusername); |
|
32 |
} |
|
33 |
} |
|
34 |
} |
|
35 |
|
|
36 |
$maxposition = ""; |
|
37 |
if (preg_match("|data-min-position=\"([^\"]*)\"|U", $content, $mp) || preg_match("|\"min_position\":\"([^\"]*)\"|U", $content, $mp)) { |
|
38 |
$maxposition = $mp[1]; |
|
39 |
} |
|
40 |
} while ($maxposition != ""); |
|
41 |
} |
|
42 |
|
|
43 |
function add_to_structure ($tweetid, $inreplyto) { |
|
44 |
global $structure; |
|
45 |
|
|
46 |
foreach ($structure as $id => $substructure) { |
|
47 |
if ($id == $inreplyto) { |
|
48 |
$structure[$id] = $tweetid; |
|
49 |
} |
|
50 |
else { |
|
51 |
add_to_structure($tweetid, $inreplyto, $structure[$id]); |
|
52 |
} |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
function collect_replying_tweets ($tweetid, $username) { |
|
57 |
global $argv, $replyingids; |
|
58 |
$replycount = 0; |
|
59 |
|
|
60 |
@mkdir("data/" . $tweetid . "/reactions/"); |
|
61 |
@chmod("data/" . $tweetid . "/reactions/", 0777); |
|
62 |
get_replying_ids($tweetid, $username); |
|
63 |
|
|
64 |
$idsstr = ""; |
|
65 |
$idcount = 0; |
|
66 |
$allcount = 0; |
|
67 |
foreach ($replyingids as $replyingid) { |
|
68 |
$allcount++; |
|
69 |
$idsstr .= $replyingid . ","; |
|
70 |
$idcount++; |
|
71 |
if ($idcount == 100 || $allcount == count($replyingids)) { |
|
72 |
$tweets = @shell_exec("python retrieve.tweet.list.py " . substr($idsstr, 0, strlen($idsstr) - 1)); |
|
73 |
$tweets = explode("\n", $tweets); |
|
74 |
foreach ($tweets as $tweet) { |
|
75 |
$tweetobj = @json_decode($tweet); |
|
76 |
if (isset($tweetobj->id_str)) { |
|
77 |
file_put_contents("data/" . $tweetid . "/reactions/" . $tweetobj->id_str . ".json", $tweet); |
|
78 |
$replycount++; |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
$idsstr = ""; |
|
83 |
$idcount = 0; |
|
84 |
} |
|
85 |
} |
|
86 |
|
|
87 |
if (isset($argv[1])) { |
|
88 |
echo $tweetid . " - source tweet and " . $replycount . " replies collected.\n"; |
|
89 |
} |
|
90 |
} |
|
91 |
|
|
92 |
function create_structure($tweetid) { |
|
93 |
global $structure; |
|
94 |
|
|
95 |
$parents = array(); |
|
96 |
$dir = dir("data/" . $tweetid . "/reactions/"); |
|
97 |
while (($file = $dir->read()) !== false) { |
|
98 |
if ($file != "." && $file != "..") { |
|
99 |
$tweet = json_decode(file_get_contents("data/" . $tweetid . "/reactions/" . $file)); |
|
100 |
|
|
101 |
$inreplyto = $tweet->in_reply_to_status_id_str; |
|
102 |
$id = $tweet->id; |
|
103 |
|
|
104 |
if (!isset($parents[$inreplyto])) { |
|
105 |
$parents[$inreplyto] = array(); |
|
106 |
} |
|
107 |
array_push($parents[$inreplyto], $id); |
|
108 |
} |
|
109 |
} |
|
110 |
|
|
111 |
foreach ($structure as $sid => $substructure) { |
|
112 |
if (isset($parents[$sid])) { |
|
113 |
foreach ($parents[$sid] as $cid) { |
|
114 |
$structure[$sid][$cid] = array(); |
|
115 |
} |
|
116 |
} |
|
117 |
} |
|
118 |
|
|
119 |
file_put_contents("data/" . $tweetid . "/structure.json", json_encode($structure)); |
|
120 |
chmod("data/" . $tweetid . "/structure.json", 0777); |
|
121 |
} |
|
122 |
|
|
123 |
if (!isset($argv[1])) { |
|
124 |
exit(0); |
|
125 |
} |
|
126 |
$tweetid = $argv[1]; |
|
127 |
|
|
128 |
if (strstr($tweetid, "/")) { |
|
129 |
$tweetid = explode("/", $tweetid); |
|
130 |
$tweetid = $tweetid[count($tweetid) - 1]; |
|
131 |
} |
|
132 |
|
|
133 |
$replyingids = array(); |
|
134 |
$structure = array($tweetid => array()); |
|
135 |
|
|
136 |
$sourcetweet = @shell_exec("python retrieve.tweet.py " . $tweetid); |
|
137 |
$sourcetweetobj = json_decode($sourcetweet); |
|
138 |
if (isset($sourcetweetobj->id_str)) { |
|
139 |
$username = $sourcetweetobj->user->screen_name; |
|
140 |
|
|
141 |
@mkdir("data/" . $tweetid); |
|
142 |
@chmod("data/" . $tweetid, 0766); |
|
143 |
@mkdir("data/" . $tweetid . "/source-tweets/"); |
|
144 |
@chmod("data/" . $tweetid . "/source-tweets/", 0766); |
|
145 |
file_put_contents("data/" . $tweetid . "/source-tweets/" . $tweetid . ".json", $sourcetweet); |
|
146 |
|
|
147 |
collect_replying_tweets($tweetid, $username); |
|
148 |
|
|
149 |
create_structure($tweetid); |
|
150 |
} |
|
151 |
?> |