Permalink
Please sign in to comment.
Showing
with
3,809 additions
and 0 deletions.
- +201 −0 LICENSE
- +38 −0 README
- +151 −0 get.thread.php
- +17 −0 index.php
- +29 −0 retrieve.tweet.list.py
- +27 −0 retrieve.tweet.py
- +20 −0 tweepy/LICENSE
- +27 −0 tweepy/__init__.py
- BIN tweepy/__init__.pyc
- +751 −0 tweepy/api.py
- BIN tweepy/api.pyc
- +156 −0 tweepy/auth.py
- BIN tweepy/auth.pyc
- +213 −0 tweepy/binder.py
- BIN tweepy/binder.pyc
- +424 −0 tweepy/cache.py
- BIN tweepy/cache.pyc
- +171 −0 tweepy/cursor.py
- BIN tweepy/cursor.pyc
- +15 −0 tweepy/error.py
- BIN tweepy/error.pyc
- +433 −0 tweepy/models.py
- BIN tweepy/models.pyc
- +655 −0 tweepy/oauth.py
- BIN tweepy/oauth.pyc
- +97 −0 tweepy/parsers.py
- BIN tweepy/parsers.pyc
- +319 −0 tweepy/streaming.py
- BIN tweepy/streaming.pyc
- +60 −0 tweepy/utils.py
- BIN tweepy/utils.pyc
- +5 −0 twitter.ini
| @@ -0,0 +1,201 @@ | ||
| + Apache License | ||
| + Version 2.0, January 2004 | ||
| + http://www.apache.org/licenses/ | ||
| + | ||
| + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | ||
| + | ||
| + 1. Definitions. | ||
| + | ||
| + "License" shall mean the terms and conditions for use, reproduction, | ||
| + and distribution as defined by Sections 1 through 9 of this document. | ||
| + | ||
| + "Licensor" shall mean the copyright owner or entity authorized by | ||
| + the copyright owner that is granting the License. | ||
| + | ||
| + "Legal Entity" shall mean the union of the acting entity and all | ||
| + other entities that control, are controlled by, or are under common | ||
| + control with that entity. For the purposes of this definition, | ||
| + "control" means (i) the power, direct or indirect, to cause the | ||
| + direction or management of such entity, whether by contract or | ||
| + otherwise, or (ii) ownership of fifty percent (50%) or more of the | ||
| + outstanding shares, or (iii) beneficial ownership of such entity. | ||
| + | ||
| + "You" (or "Your") shall mean an individual or Legal Entity | ||
| + exercising permissions granted by this License. | ||
| + | ||
| + "Source" form shall mean the preferred form for making modifications, | ||
| + including but not limited to software source code, documentation | ||
| + source, and configuration files. | ||
| + | ||
| + "Object" form shall mean any form resulting from mechanical | ||
| + transformation or translation of a Source form, including but | ||
| + not limited to compiled object code, generated documentation, | ||
| + and conversions to other media types. | ||
| + | ||
| + "Work" shall mean the work of authorship, whether in Source or | ||
| + Object form, made available under the License, as indicated by a | ||
| + copyright notice that is included in or attached to the work | ||
| + (an example is provided in the Appendix below). | ||
| + | ||
| + "Derivative Works" shall mean any work, whether in Source or Object | ||
| + form, that is based on (or derived from) the Work and for which the | ||
| + editorial revisions, annotations, elaborations, or other modifications | ||
| + represent, as a whole, an original work of authorship. For the purposes | ||
| + of this License, Derivative Works shall not include works that remain | ||
| + separable from, or merely link (or bind by name) to the interfaces of, | ||
| + the Work and Derivative Works thereof. | ||
| + | ||
| + "Contribution" shall mean any work of authorship, including | ||
| + the original version of the Work and any modifications or additions | ||
| + to that Work or Derivative Works thereof, that is intentionally | ||
| + submitted to Licensor for inclusion in the Work by the copyright owner | ||
| + or by an individual or Legal Entity authorized to submit on behalf of | ||
| + the copyright owner. For the purposes of this definition, "submitted" | ||
| + means any form of electronic, verbal, or written communication sent | ||
| + to the Licensor or its representatives, including but not limited to | ||
| + communication on electronic mailing lists, source code control systems, | ||
| + and issue tracking systems that are managed by, or on behalf of, the | ||
| + Licensor for the purpose of discussing and improving the Work, but | ||
| + excluding communication that is conspicuously marked or otherwise | ||
| + designated in writing by the copyright owner as "Not a Contribution." | ||
| + | ||
| + "Contributor" shall mean Licensor and any individual or Legal Entity | ||
| + on behalf of whom a Contribution has been received by Licensor and | ||
| + subsequently incorporated within the Work. | ||
| + | ||
| + 2. Grant of Copyright License. Subject to the terms and conditions of | ||
| + this License, each Contributor hereby grants to You a perpetual, | ||
| + worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||
| + copyright license to reproduce, prepare Derivative Works of, | ||
| + publicly display, publicly perform, sublicense, and distribute the | ||
| + Work and such Derivative Works in Source or Object form. | ||
| + | ||
| + 3. Grant of Patent License. Subject to the terms and conditions of | ||
| + this License, each Contributor hereby grants to You a perpetual, | ||
| + worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||
| + (except as stated in this section) patent license to make, have made, | ||
| + use, offer to sell, sell, import, and otherwise transfer the Work, | ||
| + where such license applies only to those patent claims licensable | ||
| + by such Contributor that are necessarily infringed by their | ||
| + Contribution(s) alone or by combination of their Contribution(s) | ||
| + with the Work to which such Contribution(s) was submitted. If You | ||
| + institute patent litigation against any entity (including a | ||
| + cross-claim or counterclaim in a lawsuit) alleging that the Work | ||
| + or a Contribution incorporated within the Work constitutes direct | ||
| + or contributory patent infringement, then any patent licenses | ||
| + granted to You under this License for that Work shall terminate | ||
| + as of the date such litigation is filed. | ||
| + | ||
| + 4. Redistribution. You may reproduce and distribute copies of the | ||
| + Work or Derivative Works thereof in any medium, with or without | ||
| + modifications, and in Source or Object form, provided that You | ||
| + meet the following conditions: | ||
| + | ||
| + (a) You must give any other recipients of the Work or | ||
| + Derivative Works a copy of this License; and | ||
| + | ||
| + (b) You must cause any modified files to carry prominent notices | ||
| + stating that You changed the files; and | ||
| + | ||
| + (c) You must retain, in the Source form of any Derivative Works | ||
| + that You distribute, all copyright, patent, trademark, and | ||
| + attribution notices from the Source form of the Work, | ||
| + excluding those notices that do not pertain to any part of | ||
| + the Derivative Works; and | ||
| + | ||
| + (d) If the Work includes a "NOTICE" text file as part of its | ||
| + distribution, then any Derivative Works that You distribute must | ||
| + include a readable copy of the attribution notices contained | ||
| + within such NOTICE file, excluding those notices that do not | ||
| + pertain to any part of the Derivative Works, in at least one | ||
| + of the following places: within a NOTICE text file distributed | ||
| + as part of the Derivative Works; within the Source form or | ||
| + documentation, if provided along with the Derivative Works; or, | ||
| + within a display generated by the Derivative Works, if and | ||
| + wherever such third-party notices normally appear. The contents | ||
| + of the NOTICE file are for informational purposes only and | ||
| + do not modify the License. You may add Your own attribution | ||
| + notices within Derivative Works that You distribute, alongside | ||
| + or as an addendum to the NOTICE text from the Work, provided | ||
| + that such additional attribution notices cannot be construed | ||
| + as modifying the License. | ||
| + | ||
| + You may add Your own copyright statement to Your modifications and | ||
| + may provide additional or different license terms and conditions | ||
| + for use, reproduction, or distribution of Your modifications, or | ||
| + for any such Derivative Works as a whole, provided Your use, | ||
| + reproduction, and distribution of the Work otherwise complies with | ||
| + the conditions stated in this License. | ||
| + | ||
| + 5. Submission of Contributions. Unless You explicitly state otherwise, | ||
| + any Contribution intentionally submitted for inclusion in the Work | ||
| + by You to the Licensor shall be under the terms and conditions of | ||
| + this License, without any additional terms or conditions. | ||
| + Notwithstanding the above, nothing herein shall supersede or modify | ||
| + the terms of any separate license agreement you may have executed | ||
| + with Licensor regarding such Contributions. | ||
| + | ||
| + 6. Trademarks. This License does not grant permission to use the trade | ||
| + names, trademarks, service marks, or product names of the Licensor, | ||
| + except as required for reasonable and customary use in describing the | ||
| + origin of the Work and reproducing the content of the NOTICE file. | ||
| + | ||
| + 7. Disclaimer of Warranty. Unless required by applicable law or | ||
| + agreed to in writing, Licensor provides the Work (and each | ||
| + Contributor provides its Contributions) on an "AS IS" BASIS, | ||
| + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
| + implied, including, without limitation, any warranties or conditions | ||
| + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | ||
| + PARTICULAR PURPOSE. You are solely responsible for determining the | ||
| + appropriateness of using or redistributing the Work and assume any | ||
| + risks associated with Your exercise of permissions under this License. | ||
| + | ||
| + 8. Limitation of Liability. In no event and under no legal theory, | ||
| + whether in tort (including negligence), contract, or otherwise, | ||
| + unless required by applicable law (such as deliberate and grossly | ||
| + negligent acts) or agreed to in writing, shall any Contributor be | ||
| + liable to You for damages, including any direct, indirect, special, | ||
| + incidental, or consequential damages of any character arising as a | ||
| + result of this License or out of the use or inability to use the | ||
| + Work (including but not limited to damages for loss of goodwill, | ||
| + work stoppage, computer failure or malfunction, or any and all | ||
| + other commercial damages or losses), even if such Contributor | ||
| + has been advised of the possibility of such damages. | ||
| + | ||
| + 9. Accepting Warranty or Additional Liability. While redistributing | ||
| + the Work or Derivative Works thereof, You may choose to offer, | ||
| + and charge a fee for, acceptance of support, warranty, indemnity, | ||
| + or other liability obligations and/or rights consistent with this | ||
| + License. However, in accepting such obligations, You may act only | ||
| + on Your own behalf and on Your sole responsibility, not on behalf | ||
| + of any other Contributor, and only if You agree to indemnify, | ||
| + defend, and hold each Contributor harmless for any liability | ||
| + incurred by, or claims asserted against, such Contributor by reason | ||
| + of your accepting any such warranty or additional liability. | ||
| + | ||
| + END OF TERMS AND CONDITIONS | ||
| + | ||
| + APPENDIX: How to apply the Apache License to your work. | ||
| + | ||
| + To apply the Apache License to your work, attach the following | ||
| + boilerplate notice, with the fields enclosed by brackets "{}" | ||
| + replaced with your own identifying information. (Don't include | ||
| + the brackets!) The text should be enclosed in the appropriate | ||
| + comment syntax for the file format. We also recommend that a | ||
| + file or class name and description of purpose be included on the | ||
| + same "printed page" as the copyright notice for easier | ||
| + identification within third-party archives. | ||
| + | ||
| + Copyright {yyyy} {name of copyright owner} | ||
| + | ||
| + Licensed under the Apache License, Version 2.0 (the "License"); | ||
| + you may not use this file except in compliance with the License. | ||
| + You may obtain a copy of the License at | ||
| + | ||
| + http://www.apache.org/licenses/LICENSE-2.0 | ||
| + | ||
| + Unless required by applicable law or agreed to in writing, software | ||
| + distributed under the License is distributed on an "AS IS" BASIS, | ||
| + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| + See the License for the specific language governing permissions and | ||
| + limitations under the License. |
| @@ -0,0 +1,38 @@ | ||
| +The PHEME conversation collection script allows the user to collect the set of tweets replying to a specific tweet, forming a conversation or a thread. The user needs to specify a single source tweet, providing its URL or tweet ID, and the tool collects the replies and stories them in the 'data/tweet-id' directory. | ||
| + | ||
| +NOTE: Since retrieval of replying tweets has been discontinued in Twitter API v1.1 (it used to be available through the 'related_results/show' endpoint in v1.0), this script scrapes the replies from the HTML of the source tweet. | ||
| + | ||
| +----- | ||
| +SETUP | ||
| +----- | ||
| + | ||
| +The script is developed using PHP (it was initially intended to run as a web service) and Python (for the back-end to access the Twitter API using the Tweepy library). It is ready to run on a system with PHP installed (i.e., the 'php5-cli' package on UNIX systems, or the equivalent PHP CLI for Windows). However, it does need to define a few settings in order to be able to access Twitter.com and the API. There are basically two steps: | ||
| + | ||
| +1. Create a cookie file to enable web access to replies: | ||
| + | ||
| +This can be done using Mozilla Firefox and the add-on Cookies Export/Import, which can be installed from: | ||
| +https://addons.mozilla.org/en-US/firefox/addon/cookies-exportimport/ | ||
| + | ||
| +It is recommended to delete all existing cookies on Firefox before proceeding. Then, you should log in on Twitter.com with some user account. After logging in, click on 'Tool -> Export cookies', and save the file with the name 'cookies.txt'. This file should be copied into the main directory of this script. | ||
| + | ||
| +2. Add Twitter API credentials to twitter.ini: | ||
| + | ||
| +This is the Twitter API configuration file. You will see four lines there where you can specify the Twitter API credentials. | ||
| + | ||
| +------------- | ||
| +HOW TO RUN IT | ||
| +------------- | ||
| + | ||
| +Once you have the ID of the tweet that you want to get the conversation for, you can run the following command: | ||
| + | ||
| +php get.thread.php tweet-id | ||
| + | ||
| +Where tweet-id is the ID of the tweet that you are interested in. If successful, the script will output the number of replies that have been collected, and a new directory will be created in the 'data' folder. | ||
| + | ||
| +--------- | ||
| +REFERENCE | ||
| +--------- | ||
| + | ||
| +This conversation collection script was used for the following paper: | ||
| + | ||
| +Arkaitz Zubiaga, Geraldine Wong Sak Hoi, Maria Liakata, Rob Procter, Peter Tolmie. Analysing How People Orient to and Spread Rumours in Social Media by Looking at Conversational Threads. arXiv. 2015. |
151
get.thread.php
| @@ -0,0 +1,151 @@ | ||
| +<?php | ||
| +ini_set("memory_limit", "2048M"); | ||
| + | ||
| +function get_replying_ids ($tweetid, $username) { | ||
| + global $replyingids; | ||
| + | ||
| + $maxposition = ""; | ||
| + | ||
| + do { | ||
| + if ($maxposition == "") { | ||
| + $url = "https://twitter.com/" . $username . "/status/" . $tweetid; | ||
| + } | ||
| + else { | ||
| + $url = "https://twitter.com/i/" . $username . "/conversation/" . $tweetid . "?include_available_features=1&include_entities=1&max_position=" . $maxposition; | ||
| + } | ||
| + $content = shell_exec("wget \"" . $url . "\" -q --load-cookies=./cookies.txt -O -"); | ||
| + $content = html_entity_decode(str_replace("\\n", "\n", $content)); | ||
| + $content = str_replace("\\u003c", "<", $content); | ||
| + $content = str_replace("\\u003e", ">", $content); | ||
| + $content = str_replace("\\/", "/", $content); | ||
| + $content = str_replace("\\\"", "\"", $content); | ||
| + | ||
| + if (preg_match_all("|<a href=\"(/[^/]*/status/[0-9]*)\" class=\"tweet-timestamp js-permalink js-nav js-tooltip\"|U", $content, $reptweets)) { | ||
| + foreach ($reptweets[1] as $key => $reptweet) { | ||
| + $reptweettokens = explode("/", $reptweet); | ||
| + $repusername = $reptweettokens[1]; | ||
| + $reptweetid = $reptweettokens[count($reptweettokens) - 1]; | ||
| + | ||
| + if (!in_array($reptweetid, $replyingids)) { | ||
| + array_push($replyingids, $reptweetid); | ||
| + get_replying_ids($reptweetid, $repusername); | ||
| + } | ||
| + } | ||
| + } | ||
| + | ||
| + $maxposition = ""; | ||
| + if (preg_match("|data-min-position=\"([^\"]*)\"|U", $content, $mp) || preg_match("|\"min_position\":\"([^\"]*)\"|U", $content, $mp)) { | ||
| + $maxposition = $mp[1]; | ||
| + } | ||
| + } while ($maxposition != ""); | ||
| +} | ||
| + | ||
| +function add_to_structure ($tweetid, $inreplyto) { | ||
| + global $structure; | ||
| + | ||
| + foreach ($structure as $id => $substructure) { | ||
| + if ($id == $inreplyto) { | ||
| + $structure[$id] = $tweetid; | ||
| + } | ||
| + else { | ||
| + add_to_structure($tweetid, $inreplyto, $structure[$id]); | ||
| + } | ||
| + } | ||
| +} | ||
| + | ||
| +function collect_replying_tweets ($tweetid, $username) { | ||
| + global $argv, $replyingids; | ||
| + $replycount = 0; | ||
| + | ||
| + @mkdir("data/" . $tweetid . "/reactions/"); | ||
| + @chmod("data/" . $tweetid . "/reactions/", 0777); | ||
| + get_replying_ids($tweetid, $username); | ||
| + | ||
| + $idsstr = ""; | ||
| + $idcount = 0; | ||
| + $allcount = 0; | ||
| + foreach ($replyingids as $replyingid) { | ||
| + $allcount++; | ||
| + $idsstr .= $replyingid . ","; | ||
| + $idcount++; | ||
| + if ($idcount == 100 || $allcount == count($replyingids)) { | ||
| + $tweets = @shell_exec("python retrieve.tweet.list.py " . substr($idsstr, 0, strlen($idsstr) - 1)); | ||
| + $tweets = explode("\n", $tweets); | ||
| + foreach ($tweets as $tweet) { | ||
| + $tweetobj = @json_decode($tweet); | ||
| + if (isset($tweetobj->id_str)) { | ||
| + file_put_contents("data/" . $tweetid . "/reactions/" . $tweetobj->id_str . ".json", $tweet); | ||
| + $replycount++; | ||
| + } | ||
| + } | ||
| + | ||
| + $idsstr = ""; | ||
| + $idcount = 0; | ||
| + } | ||
| + } | ||
| + | ||
| + if (isset($argv[1])) { | ||
| + echo $tweetid . " - source tweet and " . $replycount . " replies collected.\n"; | ||
| + } | ||
| +} | ||
| + | ||
| +function create_structure($tweetid) { | ||
| + global $structure; | ||
| + | ||
| + $parents = array(); | ||
| + $dir = dir("data/" . $tweetid . "/reactions/"); | ||
| + while (($file = $dir->read()) !== false) { | ||
| + if ($file != "." && $file != "..") { | ||
| + $tweet = json_decode(file_get_contents("data/" . $tweetid . "/reactions/" . $file)); | ||
| + | ||
| + $inreplyto = $tweet->in_reply_to_status_id_str; | ||
| + $id = $tweet->id; | ||
| + | ||
| + if (!isset($parents[$inreplyto])) { | ||
| + $parents[$inreplyto] = array(); | ||
| + } | ||
| + array_push($parents[$inreplyto], $id); | ||
| + } | ||
| + } | ||
| + | ||
| + foreach ($structure as $sid => $substructure) { | ||
| + if (isset($parents[$sid])) { | ||
| + foreach ($parents[$sid] as $cid) { | ||
| + $structure[$sid][$cid] = array(); | ||
| + } | ||
| + } | ||
| + } | ||
| + | ||
| + file_put_contents("data/" . $tweetid . "/structure.json", json_encode($structure)); | ||
| + chmod("data/" . $tweetid . "/structure.json", 0777); | ||
| +} | ||
| + | ||
| +if (!isset($argv[1])) { | ||
| + exit(0); | ||
| +} | ||
| +$tweetid = $argv[1]; | ||
| + | ||
| +if (strstr($tweetid, "/")) { | ||
| + $tweetid = explode("/", $tweetid); | ||
| + $tweetid = $tweetid[count($tweetid) - 1]; | ||
| +} | ||
| + | ||
| +$replyingids = array(); | ||
| +$structure = array($tweetid => array()); | ||
| + | ||
| +$sourcetweet = @shell_exec("python retrieve.tweet.py " . $tweetid); | ||
| +$sourcetweetobj = json_decode($sourcetweet); | ||
| +if (isset($sourcetweetobj->id_str)) { | ||
| + $username = $sourcetweetobj->user->screen_name; | ||
| + | ||
| + @mkdir("data/" . $tweetid); | ||
| + @chmod("data/" . $tweetid, 0766); | ||
| + @mkdir("data/" . $tweetid . "/source-tweets/"); | ||
| + @chmod("data/" . $tweetid . "/source-tweets/", 0766); | ||
| + file_put_contents("data/" . $tweetid . "/source-tweets/" . $tweetid . ".json", $sourcetweet); | ||
| + | ||
| + collect_replying_tweets($tweetid, $username); | ||
| + | ||
| + create_structure($tweetid); | ||
| +} | ||
| +?> |
Oops, something went wrong.
0 comments on commit
797f0d4