# PYTHON PROBLEM

## Importing Libraries

In [50]:
import jsonlines
import datetime
import json
import requests
import numpy as np
import os
import pandas as pd
from requests_oauthlib import OAuth1

# PART - A


## Setting up Twitter API

In [41]:
# obtained from twitter developers page.
api_creds = {
    'consumer_key':'',
    'consumer_secret':'',
    'access_token':'',
    'access_token_secret':''
}

# creating authentication session to use twitter api
auth = OAuth1(api_creds['consumer_key'], api_creds['consumer_secret'],
                     api_creds['access_token'],api_creds['access_token_secret'])

## Fetching tweets

In [42]:
def getUserTweets(screen_name,authorization,fetchAttempts):
    # the url to fetch user_timeline tweets (available on twitter developers webpage)
    api_url='https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name={}&count=200&trim_user=true&tweet_mode=extended'
    api_url=api_url.format(screen_name)
    tweets = []
    for i in range(fetchAttempts):
        tweets = tweets + requests.get(api_url,auth=authorization).json()
    return tweets


In [43]:
handle_name = 'midasiiitd'
tweets = getUserTweets(handle_name,auth,4)
print(len(tweets))

800


## Dumping all tweets into JSONlines file

In [44]:
def saveJsonline(tweetList,filename):
    with open(filename,'w',encoding="utf-8") as file:
        book = jsonlines.Writer(file)
        book.write_all(tweetList)
        book.close
        
# calling the function to save tweets to jsonl file named 'tweets.jsonl'
saveJsonline(tweets,'tweets.jsonl')

# PART - B

## Parsing JSONlines file to load tweets

In [45]:
loadedTweets = []
with jsonlines.open('tweets.jsonl') as reader:
    for tweet in reader:
        loadedTweets.append(tweet)

In [46]:
# displaying length of loadedTweets
print(len(loadedTweets))

# displaying a single tweet from loadedTweets
loadedTweets[0]

800


{'created_at': 'Mon Apr 08 07:08:12 +0000 2019',
 'id': 1115149324533542912,
 'id_str': '1115149324533542912',
 'full_text': 'Many Congratulations to @midasIIITD student, Shagun Uppal @shagunuppls, on getting selected for the summer internship in the BRAIN lab at Singapore University of Technology &amp; Design @sutdsg, Singapore.\nWe wish her the best luck for the internship.\n\n#MIDAS #StudentAchievement https://t.co/snX2GkzvQg',
 'truncated': False,
 'display_text_range': [0, 279],
 'entities': {'hashtags': [{'text': 'MIDAS', 'indices': [253, 259]},
   {'text': 'StudentAchievement', 'indices': [260, 279]}],
  'symbols': [],
  'user_mentions': [{'screen_name': 'midasIIITD',
    'name': 'MIDAS IIITD',
    'id': 1021355762575073281,
    'id_str': '1021355762575073281',
    'indices': [24, 35]},
   {'screen_name': 'shagunuppls',
    'name': 'shagun uppal',
    'id': 1399417358,
    'id_str': '1399417358',
    'indices': [58, 70]},
   {'screen_name': 'sutdsg',
    'name': 'SUTD Singapore'


## Formatting each tweet to show following details
● The text of the tweet.

● Date and time of the tweet.

● The number of favorites/likes.

● The number of retweets.

● Number of Images present in Tweet. If no image returns None.

In [47]:
# this function parse individual tweet into required format.
def tweetInfo(tweet):
    cleanTweet = []
    for col in ['created_at','full_text', 'favorite_count','retweet_count']:
        cleanTweet.append(tweet[col])
    
    # Converting string date to datetime format.
    cleanTweet[0] = datetime.datetime.strptime(cleanTweet[0][:-10]+cleanTweet[0][-4:],"%a %b %d %H:%M:%S %Y")
    
    # this piece of code is for finding number of images in the tweet if exists.
    count=0
    if 'extended_entities' in tweet.keys():
        if 'media' in tweet['extended_entities'].keys():
            for m in tweet['extended_entities']['media']:
                if m['type'] == 'photo':
                    count = count + 1
    
    if(count == 0):
        cleanTweet.append(None)
    else:
        cleanTweet.append(count)
    
    return cleanTweet

## Displaying the tweets table

In [51]:
def TweetsTable(tweets):
    table = [tweetInfo(t) for t in tweets]  
    df = pd.DataFrame(table,columns=['Timestamp','Tweets','Likes','Retweets','Image Count'])
    pd.options.display.max_rows
    pd.set_option('display.max_colwidth', -1)
    return df

table = TweetsTable(loadedTweets)
table.to_csv("tweets.csv", index=False)
display(table)

Unnamed: 0,Timestamp,Tweets,Likes,Retweets,Image Count
0,2019-04-08 07:08:12,"Many Congratulations to @midasIIITD student, Shagun Uppal @shagunuppls, on getting selected for the summer internship in the BRAIN lab at Singapore University of Technology &amp; Design @sutdsg, Singapore.\nWe wish her the best luck for the internship.\n\n#MIDAS #StudentAchievement https://t.co/snX2GkzvQg",13,2,1.0
1,2019-04-08 03:27:42,@midasIIITD thanks all students who have appeared for the interview yesterday. We will announce the interview results for MIDAS internship latest by 14th April 2019. Sorry for the delay since we are carefully evaluating all applications that we have received for the internship. https://t.co/FANz5vImwU,4,0,1.0
2,2019-04-07 14:17:29,"@himanchalchandr Meanwhile, complete CV/NLP task first.",0,0,
3,2019-04-07 14:17:09,@sayangdipto123 Submit as per the guideline again.,0,0,
4,2019-04-07 11:43:24,We request all students whose interview are scheduled today to join at the time given to them and not before or after. We have other interviews scheduled which will be affected. \nThanks much.,1,1,
5,2019-04-07 06:55:19,"Other queries: ""none of the Tweeter Apis give the correct count of favorites tested for most of them, all give the wrong count. same is true for retweet. this mostly happens if the no. of likes, retweet is very large. So, what shld be done?""\nAns: Just use the count given by API.",5,2,
6,2019-04-07 06:53:38,"Other queries: ""do we have to make two different repositories on Github as ""Python Problem"" and ""CV Problem"". Or we have to make a single repository named ""root"" and upload our code in that?""\nAnd: Have one repository of any name which has two folders. One python and other cv/nlp",4,1,
7,2019-04-07 05:32:27,"Other queries: ""If using Twitter api, it does not include any information about images in the response objects sent, so how do we retrieve the image info as requested?""\nAns: U have to use twitter api or wrappers based on it like tweepy.\nTwitter API does provide image information.",6,1,
8,2019-04-07 05:29:40,"Response to some queries asked by students on @midasIIITD task.\nWhat does the line ""dump the responses into JSONlines file"" mean?\nIt means you have to dump the response into a jsonlines file. jsonlines is a file extension more popularly know as .jsonl\n#MIDAS #Research #Internship",7,1,
9,2019-04-06 17:11:29,RT @kdnuggets: Top 8 #Free Must-Read #Books on #DeepLearning #KDN https://t.co/1DtlN91Yjj,0,2,
