Permalink
Please sign in to comment.
Showing
with
3,809 additions
and 0 deletions.
- +201 −0 LICENSE
- +38 −0 README
- +151 −0 get.thread.php
- +17 −0 index.php
- +29 −0 retrieve.tweet.list.py
- +27 −0 retrieve.tweet.py
- +20 −0 tweepy/LICENSE
- +27 −0 tweepy/__init__.py
- BIN tweepy/__init__.pyc
- +751 −0 tweepy/api.py
- BIN tweepy/api.pyc
- +156 −0 tweepy/auth.py
- BIN tweepy/auth.pyc
- +213 −0 tweepy/binder.py
- BIN tweepy/binder.pyc
- +424 −0 tweepy/cache.py
- BIN tweepy/cache.pyc
- +171 −0 tweepy/cursor.py
- BIN tweepy/cursor.pyc
- +15 −0 tweepy/error.py
- BIN tweepy/error.pyc
- +433 −0 tweepy/models.py
- BIN tweepy/models.pyc
- +655 −0 tweepy/oauth.py
- BIN tweepy/oauth.pyc
- +97 −0 tweepy/parsers.py
- BIN tweepy/parsers.pyc
- +319 −0 tweepy/streaming.py
- BIN tweepy/streaming.pyc
- +60 −0 tweepy/utils.py
- BIN tweepy/utils.pyc
- +5 −0 twitter.ini
| @@ -0,0 +1,201 @@ | |||
| + Apache License | |||
| + Version 2.0, January 2004 | |||
| + http://www.apache.org/licenses/ | |||
| + | |||
| + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | |||
| + | |||
| + 1. Definitions. | |||
| + | |||
| + "License" shall mean the terms and conditions for use, reproduction, | |||
| + and distribution as defined by Sections 1 through 9 of this document. | |||
| + | |||
| + "Licensor" shall mean the copyright owner or entity authorized by | |||
| + the copyright owner that is granting the License. | |||
| + | |||
| + "Legal Entity" shall mean the union of the acting entity and all | |||
| + other entities that control, are controlled by, or are under common | |||
| + control with that entity. For the purposes of this definition, | |||
| + "control" means (i) the power, direct or indirect, to cause the | |||
| + direction or management of such entity, whether by contract or | |||
| + otherwise, or (ii) ownership of fifty percent (50%) or more of the | |||
| + outstanding shares, or (iii) beneficial ownership of such entity. | |||
| + | |||
| + "You" (or "Your") shall mean an individual or Legal Entity | |||
| + exercising permissions granted by this License. | |||
| + | |||
| + "Source" form shall mean the preferred form for making modifications, | |||
| + including but not limited to software source code, documentation | |||
| + source, and configuration files. | |||
| + | |||
| + "Object" form shall mean any form resulting from mechanical | |||
| + transformation or translation of a Source form, including but | |||
| + not limited to compiled object code, generated documentation, | |||
| + and conversions to other media types. | |||
| + | |||
| + "Work" shall mean the work of authorship, whether in Source or | |||
| + Object form, made available under the License, as indicated by a | |||
| + copyright notice that is included in or attached to the work | |||
| + (an example is provided in the Appendix below). | |||
| + | |||
| + "Derivative Works" shall mean any work, whether in Source or Object | |||
| + form, that is based on (or derived from) the Work and for which the | |||
| + editorial revisions, annotations, elaborations, or other modifications | |||
| + represent, as a whole, an original work of authorship. For the purposes | |||
| + of this License, Derivative Works shall not include works that remain | |||
| + separable from, or merely link (or bind by name) to the interfaces of, | |||
| + the Work and Derivative Works thereof. | |||
| + | |||
| + "Contribution" shall mean any work of authorship, including | |||
| + the original version of the Work and any modifications or additions | |||
| + to that Work or Derivative Works thereof, that is intentionally | |||
| + submitted to Licensor for inclusion in the Work by the copyright owner | |||
| + or by an individual or Legal Entity authorized to submit on behalf of | |||
| + the copyright owner. For the purposes of this definition, "submitted" | |||
| + means any form of electronic, verbal, or written communication sent | |||
| + to the Licensor or its representatives, including but not limited to | |||
| + communication on electronic mailing lists, source code control systems, | |||
| + and issue tracking systems that are managed by, or on behalf of, the | |||
| + Licensor for the purpose of discussing and improving the Work, but | |||
| + excluding communication that is conspicuously marked or otherwise | |||
| + designated in writing by the copyright owner as "Not a Contribution." | |||
| + | |||
| + "Contributor" shall mean Licensor and any individual or Legal Entity | |||
| + on behalf of whom a Contribution has been received by Licensor and | |||
| + subsequently incorporated within the Work. | |||
| + | |||
| + 2. Grant of Copyright License. Subject to the terms and conditions of | |||
| + this License, each Contributor hereby grants to You a perpetual, | |||
| + worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||
| + copyright license to reproduce, prepare Derivative Works of, | |||
| + publicly display, publicly perform, sublicense, and distribute the | |||
| + Work and such Derivative Works in Source or Object form. | |||
| + | |||
| + 3. Grant of Patent License. Subject to the terms and conditions of | |||
| + this License, each Contributor hereby grants to You a perpetual, | |||
| + worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||
| + (except as stated in this section) patent license to make, have made, | |||
| + use, offer to sell, sell, import, and otherwise transfer the Work, | |||
| + where such license applies only to those patent claims licensable | |||
| + by such Contributor that are necessarily infringed by their | |||
| + Contribution(s) alone or by combination of their Contribution(s) | |||
| + with the Work to which such Contribution(s) was submitted. If You | |||
| + institute patent litigation against any entity (including a | |||
| + cross-claim or counterclaim in a lawsuit) alleging that the Work | |||
| + or a Contribution incorporated within the Work constitutes direct | |||
| + or contributory patent infringement, then any patent licenses | |||
| + granted to You under this License for that Work shall terminate | |||
| + as of the date such litigation is filed. | |||
| + | |||
| + 4. Redistribution. You may reproduce and distribute copies of the | |||
| + Work or Derivative Works thereof in any medium, with or without | |||
| + modifications, and in Source or Object form, provided that You | |||
| + meet the following conditions: | |||
| + | |||
| + (a) You must give any other recipients of the Work or | |||
| + Derivative Works a copy of this License; and | |||
| + | |||
| + (b) You must cause any modified files to carry prominent notices | |||
| + stating that You changed the files; and | |||
| + | |||
| + (c) You must retain, in the Source form of any Derivative Works | |||
| + that You distribute, all copyright, patent, trademark, and | |||
| + attribution notices from the Source form of the Work, | |||
| + excluding those notices that do not pertain to any part of | |||
| + the Derivative Works; and | |||
| + | |||
| + (d) If the Work includes a "NOTICE" text file as part of its | |||
| + distribution, then any Derivative Works that You distribute must | |||
| + include a readable copy of the attribution notices contained | |||
| + within such NOTICE file, excluding those notices that do not | |||
| + pertain to any part of the Derivative Works, in at least one | |||
| + of the following places: within a NOTICE text file distributed | |||
| + as part of the Derivative Works; within the Source form or | |||
| + documentation, if provided along with the Derivative Works; or, | |||
| + within a display generated by the Derivative Works, if and | |||
| + wherever such third-party notices normally appear. The contents | |||
| + of the NOTICE file are for informational purposes only and | |||
| + do not modify the License. You may add Your own attribution | |||
| + notices within Derivative Works that You distribute, alongside | |||
| + or as an addendum to the NOTICE text from the Work, provided | |||
| + that such additional attribution notices cannot be construed | |||
| + as modifying the License. | |||
| + | |||
| + You may add Your own copyright statement to Your modifications and | |||
| + may provide additional or different license terms and conditions | |||
| + for use, reproduction, or distribution of Your modifications, or | |||
| + for any such Derivative Works as a whole, provided Your use, | |||
| + reproduction, and distribution of the Work otherwise complies with | |||
| + the conditions stated in this License. | |||
| + | |||
| + 5. Submission of Contributions. Unless You explicitly state otherwise, | |||
| + any Contribution intentionally submitted for inclusion in the Work | |||
| + by You to the Licensor shall be under the terms and conditions of | |||
| + this License, without any additional terms or conditions. | |||
| + Notwithstanding the above, nothing herein shall supersede or modify | |||
| + the terms of any separate license agreement you may have executed | |||
| + with Licensor regarding such Contributions. | |||
| + | |||
| + 6. Trademarks. This License does not grant permission to use the trade | |||
| + names, trademarks, service marks, or product names of the Licensor, | |||
| + except as required for reasonable and customary use in describing the | |||
| + origin of the Work and reproducing the content of the NOTICE file. | |||
| + | |||
| + 7. Disclaimer of Warranty. Unless required by applicable law or | |||
| + agreed to in writing, Licensor provides the Work (and each | |||
| + Contributor provides its Contributions) on an "AS IS" BASIS, | |||
| + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| + implied, including, without limitation, any warranties or conditions | |||
| + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | |||
| + PARTICULAR PURPOSE. You are solely responsible for determining the | |||
| + appropriateness of using or redistributing the Work and assume any | |||
| + risks associated with Your exercise of permissions under this License. | |||
| + | |||
| + 8. Limitation of Liability. In no event and under no legal theory, | |||
| + whether in tort (including negligence), contract, or otherwise, | |||
| + unless required by applicable law (such as deliberate and grossly | |||
| + negligent acts) or agreed to in writing, shall any Contributor be | |||
| + liable to You for damages, including any direct, indirect, special, | |||
| + incidental, or consequential damages of any character arising as a | |||
| + result of this License or out of the use or inability to use the | |||
| + Work (including but not limited to damages for loss of goodwill, | |||
| + work stoppage, computer failure or malfunction, or any and all | |||
| + other commercial damages or losses), even if such Contributor | |||
| + has been advised of the possibility of such damages. | |||
| + | |||
| + 9. Accepting Warranty or Additional Liability. While redistributing | |||
| + the Work or Derivative Works thereof, You may choose to offer, | |||
| + and charge a fee for, acceptance of support, warranty, indemnity, | |||
| + or other liability obligations and/or rights consistent with this | |||
| + License. However, in accepting such obligations, You may act only | |||
| + on Your own behalf and on Your sole responsibility, not on behalf | |||
| + of any other Contributor, and only if You agree to indemnify, | |||
| + defend, and hold each Contributor harmless for any liability | |||
| + incurred by, or claims asserted against, such Contributor by reason | |||
| + of your accepting any such warranty or additional liability. | |||
| + | |||
| + END OF TERMS AND CONDITIONS | |||
| + | |||
| + APPENDIX: How to apply the Apache License to your work. | |||
| + | |||
| + To apply the Apache License to your work, attach the following | |||
| + boilerplate notice, with the fields enclosed by brackets "{}" | |||
| + replaced with your own identifying information. (Don't include | |||
| + the brackets!) The text should be enclosed in the appropriate | |||
| + comment syntax for the file format. We also recommend that a | |||
| + file or class name and description of purpose be included on the | |||
| + same "printed page" as the copyright notice for easier | |||
| + identification within third-party archives. | |||
| + | |||
| + Copyright {yyyy} {name of copyright owner} | |||
| + | |||
| + Licensed under the Apache License, Version 2.0 (the "License"); | |||
| + you may not use this file except in compliance with the License. | |||
| + You may obtain a copy of the License at | |||
| + | |||
| + http://www.apache.org/licenses/LICENSE-2.0 | |||
| + | |||
| + Unless required by applicable law or agreed to in writing, software | |||
| + distributed under the License is distributed on an "AS IS" BASIS, | |||
| + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| + See the License for the specific language governing permissions and | |||
| + limitations under the License. | |||
| @@ -0,0 +1,38 @@ | |||
| +The PHEME conversation collection script allows the user to collect the set of tweets replying to a specific tweet, forming a conversation or a thread. The user needs to specify a single source tweet, providing its URL or tweet ID, and the tool collects the replies and stories them in the 'data/tweet-id' directory. | |||
| + | |||
| +NOTE: Since retrieval of replying tweets has been discontinued in Twitter API v1.1 (it used to be available through the 'related_results/show' endpoint in v1.0), this script scrapes the replies from the HTML of the source tweet. | |||
| + | |||
| +----- | |||
| +SETUP | |||
| +----- | |||
| + | |||
| +The script is developed using PHP (it was initially intended to run as a web service) and Python (for the back-end to access the Twitter API using the Tweepy library). It is ready to run on a system with PHP installed (i.e., the 'php5-cli' package on UNIX systems, or the equivalent PHP CLI for Windows). However, it does need to define a few settings in order to be able to access Twitter.com and the API. There are basically two steps: | |||
| + | |||
| +1. Create a cookie file to enable web access to replies: | |||
| + | |||
| +This can be done using Mozilla Firefox and the add-on Cookies Export/Import, which can be installed from: | |||
| +https://addons.mozilla.org/en-US/firefox/addon/cookies-exportimport/ | |||
| + | |||
| +It is recommended to delete all existing cookies on Firefox before proceeding. Then, you should log in on Twitter.com with some user account. After logging in, click on 'Tool -> Export cookies', and save the file with the name 'cookies.txt'. This file should be copied into the main directory of this script. | |||
| + | |||
| +2. Add Twitter API credentials to twitter.ini: | |||
| + | |||
| +This is the Twitter API configuration file. You will see four lines there where you can specify the Twitter API credentials. | |||
| + | |||
| +------------- | |||
| +HOW TO RUN IT | |||
| +------------- | |||
| + | |||
| +Once you have the ID of the tweet that you want to get the conversation for, you can run the following command: | |||
| + | |||
| +php get.thread.php tweet-id | |||
| + | |||
| +Where tweet-id is the ID of the tweet that you are interested in. If successful, the script will output the number of replies that have been collected, and a new directory will be created in the 'data' folder. | |||
| + | |||
| +--------- | |||
| +REFERENCE | |||
| +--------- | |||
| + | |||
| +This conversation collection script was used for the following paper: | |||
| + | |||
| +Arkaitz Zubiaga, Geraldine Wong Sak Hoi, Maria Liakata, Rob Procter, Peter Tolmie. Analysing How People Orient to and Spread Rumours in Social Media by Looking at Conversational Threads. arXiv. 2015. | |||
151
get.thread.php
| @@ -0,0 +1,151 @@ | |||
| +<?php | |||
| +ini_set("memory_limit", "2048M"); | |||
| + | |||
| +function get_replying_ids ($tweetid, $username) { | |||
| + global $replyingids; | |||
| + | |||
| + $maxposition = ""; | |||
| + | |||
| + do { | |||
| + if ($maxposition == "") { | |||
| + $url = "https://twitter.com/" . $username . "/status/" . $tweetid; | |||
| + } | |||
| + else { | |||
| + $url = "https://twitter.com/i/" . $username . "/conversation/" . $tweetid . "?include_available_features=1&include_entities=1&max_position=" . $maxposition; | |||
| + } | |||
| + $content = shell_exec("wget \"" . $url . "\" -q --load-cookies=./cookies.txt -O -"); | |||
| + $content = html_entity_decode(str_replace("\\n", "\n", $content)); | |||
| + $content = str_replace("\\u003c", "<", $content); | |||
| + $content = str_replace("\\u003e", ">", $content); | |||
| + $content = str_replace("\\/", "/", $content); | |||
| + $content = str_replace("\\\"", "\"", $content); | |||
| + | |||
| + if (preg_match_all("|<a href=\"(/[^/]*/status/[0-9]*)\" class=\"tweet-timestamp js-permalink js-nav js-tooltip\"|U", $content, $reptweets)) { | |||
| + foreach ($reptweets[1] as $key => $reptweet) { | |||
| + $reptweettokens = explode("/", $reptweet); | |||
| + $repusername = $reptweettokens[1]; | |||
| + $reptweetid = $reptweettokens[count($reptweettokens) - 1]; | |||
| + | |||
| + if (!in_array($reptweetid, $replyingids)) { | |||
| + array_push($replyingids, $reptweetid); | |||
| + get_replying_ids($reptweetid, $repusername); | |||
| + } | |||
| + } | |||
| + } | |||
| + | |||
| + $maxposition = ""; | |||
| + if (preg_match("|data-min-position=\"([^\"]*)\"|U", $content, $mp) || preg_match("|\"min_position\":\"([^\"]*)\"|U", $content, $mp)) { | |||
| + $maxposition = $mp[1]; | |||
| + } | |||
| + } while ($maxposition != ""); | |||
| +} | |||
| + | |||
| +function add_to_structure ($tweetid, $inreplyto) { | |||
| + global $structure; | |||
| + | |||
| + foreach ($structure as $id => $substructure) { | |||
| + if ($id == $inreplyto) { | |||
| + $structure[$id] = $tweetid; | |||
| + } | |||
| + else { | |||
| + add_to_structure($tweetid, $inreplyto, $structure[$id]); | |||
| + } | |||
| + } | |||
| +} | |||
| + | |||
| +function collect_replying_tweets ($tweetid, $username) { | |||
| + global $argv, $replyingids; | |||
| + $replycount = 0; | |||
| + | |||
| + @mkdir("data/" . $tweetid . "/reactions/"); | |||
| + @chmod("data/" . $tweetid . "/reactions/", 0777); | |||
| + get_replying_ids($tweetid, $username); | |||
| + | |||
| + $idsstr = ""; | |||
| + $idcount = 0; | |||
| + $allcount = 0; | |||
| + foreach ($replyingids as $replyingid) { | |||
| + $allcount++; | |||
| + $idsstr .= $replyingid . ","; | |||
| + $idcount++; | |||
| + if ($idcount == 100 || $allcount == count($replyingids)) { | |||
| + $tweets = @shell_exec("python retrieve.tweet.list.py " . substr($idsstr, 0, strlen($idsstr) - 1)); | |||
| + $tweets = explode("\n", $tweets); | |||
| + foreach ($tweets as $tweet) { | |||
| + $tweetobj = @json_decode($tweet); | |||
| + if (isset($tweetobj->id_str)) { | |||
| + file_put_contents("data/" . $tweetid . "/reactions/" . $tweetobj->id_str . ".json", $tweet); | |||
| + $replycount++; | |||
| + } | |||
| + } | |||
| + | |||
| + $idsstr = ""; | |||
| + $idcount = 0; | |||
| + } | |||
| + } | |||
| + | |||
| + if (isset($argv[1])) { | |||
| + echo $tweetid . " - source tweet and " . $replycount . " replies collected.\n"; | |||
| + } | |||
| +} | |||
| + | |||
| +function create_structure($tweetid) { | |||
| + global $structure; | |||
| + | |||
| + $parents = array(); | |||
| + $dir = dir("data/" . $tweetid . "/reactions/"); | |||
| + while (($file = $dir->read()) !== false) { | |||
| + if ($file != "." && $file != "..") { | |||
| + $tweet = json_decode(file_get_contents("data/" . $tweetid . "/reactions/" . $file)); | |||
| + | |||
| + $inreplyto = $tweet->in_reply_to_status_id_str; | |||
| + $id = $tweet->id; | |||
| + | |||
| + if (!isset($parents[$inreplyto])) { | |||
| + $parents[$inreplyto] = array(); | |||
| + } | |||
| + array_push($parents[$inreplyto], $id); | |||
| + } | |||
| + } | |||
| + | |||
| + foreach ($structure as $sid => $substructure) { | |||
| + if (isset($parents[$sid])) { | |||
| + foreach ($parents[$sid] as $cid) { | |||
| + $structure[$sid][$cid] = array(); | |||
| + } | |||
| + } | |||
| + } | |||
| + | |||
| + file_put_contents("data/" . $tweetid . "/structure.json", json_encode($structure)); | |||
| + chmod("data/" . $tweetid . "/structure.json", 0777); | |||
| +} | |||
| + | |||
| +if (!isset($argv[1])) { | |||
| + exit(0); | |||
| +} | |||
| +$tweetid = $argv[1]; | |||
| + | |||
| +if (strstr($tweetid, "/")) { | |||
| + $tweetid = explode("/", $tweetid); | |||
| + $tweetid = $tweetid[count($tweetid) - 1]; | |||
| +} | |||
| + | |||
| +$replyingids = array(); | |||
| +$structure = array($tweetid => array()); | |||
| + | |||
| +$sourcetweet = @shell_exec("python retrieve.tweet.py " . $tweetid); | |||
| +$sourcetweetobj = json_decode($sourcetweet); | |||
| +if (isset($sourcetweetobj->id_str)) { | |||
| + $username = $sourcetweetobj->user->screen_name; | |||
| + | |||
| + @mkdir("data/" . $tweetid); | |||
| + @chmod("data/" . $tweetid, 0766); | |||
| + @mkdir("data/" . $tweetid . "/source-tweets/"); | |||
| + @chmod("data/" . $tweetid . "/source-tweets/", 0766); | |||
| + file_put_contents("data/" . $tweetid . "/source-tweets/" . $tweetid . ".json", $sourcetweet); | |||
| + | |||
| + collect_replying_tweets($tweetid, $username); | |||
| + | |||
| + create_structure($tweetid); | |||
| +} | |||
| +?> | |||
Oops, something went wrong.
0 comments on commit
797f0d4