**Importing the important libraries required for the task**

In [0]:
import pandas as pd# for using the DataFrame for storing data in tabular format
import requests# to send a get request to the twitter api for getting the data
from requests_oauthlib import OAuth1# for creating an OAuth Client connection
import json

**Entering the token values for accessing the Twitter API and creating a OAuth Client Connection**

In [0]:
auth_params = {
    'app_key':'',
    'app_secret':'',
    'oauth_token':'',
    'oauth_token_secret':''
}

# Creating an OAuth Client connection
auth = OAuth1 (
    auth_params['app_key'],
    auth_params['app_secret'],
    auth_params['oauth_token'],
    auth_params['oauth_token_secret']
)

**Sending a GET request to the URL**

In [0]:
# url according to twitter API
url_rest = "https://api.twitter.com/1.1/search/tweets.json"

# getting rid of retweets in the extraction results and filtering all replies to the tweet often uncessary for the analysis
q = '%@midasIIITD -filter:retweets -filter:replies'

# getting the data from the api
params = {'q': q, 'lang': 'en',  'result_type': 'recent'}
results = requests.get(url_rest, params=params, auth=auth)# we get  a response object from this method

**convert the response object into a json**

In [0]:
results = results.json()

In [0]:
results# viewing the data

In [0]:
results.keys() # viewing the keys of the dictionary

dict_keys(['statuses', 'search_metadata'])

In [0]:
results['statuses']# viewing data of each tweet 

In [0]:
results['statuses'][0].keys()# viewing the contents of the first tweet

dict_keys(['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'possibly_sensitive', 'lang'])

**Extracting the Date and Time from a single String**

In [0]:
results['statuses'][0]['created_at']

'Mon Apr 08 07:08:12 +0000 2019'

**Extracting the date**

In [0]:
date = results['statuses'][0]['created_at'][:11]
date = date+results['statuses'][0]['created_at'][-4:]
date

'Mon Apr 08 2019'

**Extracting the time**

In [0]:
time = results['statuses'][0]['created_at'][11:-5]
time

'07:08:12 +0000'

**Viewing the text in each twitter**

In [0]:
results['statuses'][0]['text']

'Many Congratulations to @midasIIITD student, Shagun Uppal @shagunuppls, on getting selected for the summer internshâ€¦ https://t.co/bzhiSm4zuB'

In [0]:
for i in range(len(results['statuses'])):#viewing the number of likes on each tweet
  print(results['statuses'][i]['favorite_count'])

13
0
1
5
4
6
7
11
8
5
7


In [0]:
for i in range(len(results['statuses'])):# viewing the number of retweets on each tweet
  print(results['statuses'][i]['retweet_count'])

2
0
1
2
1
1
1
1
1
1
2


**Saving The Data into jsonl file**

In [0]:
with open('data.jsonl','w') as d:
  for i in results['statuses']:
    json.dump(i,d)
    d.write('\n')

**Reading the jsonl file**

In [15]:
with open('data.jsonl','r') as f:
  contents = f.readlines() # each tweet dictionary is read as string

print(contents) # list of strings 

13


**Converting the data into Tabular Format using pandas DataFrame**

We use json.loads to convert the string into json

In [0]:
all_rows = []# a list for storing each row of the DataFrame
for i in range(len(contents)):# traversing through each tweet
  row = []# a list for storing the data of a single row(tweet)
  ph = 0
  row.append(json.loads(contents[i])['text'])# appending the text of the ith tweet
  date = json.loads(contents[i])['created_at'][:11]
  date = date+json.loads(contents[i])['created_at'][-4:]
  row.append(date)#appending the date of the ith tweet
  time = json.loads(contents[i])['created_at'][11:-5]
  row.append(time)#appending the time of the ith tweet
  row.append(json.loads(contents[i])['favorite_count'])#appending the number of likes on the ith tweet
  row.append(json.loads(contents[i])['retweet_count'])#appending the number of retweets on the ith tweet
  x = json.loads(contents[i])['entities'].keys()
  if 'media' in x:# checking if there is an image associated with the tweet
    for k in json.loads(contents[i])['entities']['media']:
      
      if k['type'] == 'photo':# checking if it is an image
        ph = ph + 1
  if ph>0:
    row.append(ph)# appending the image if it exists
  else:
    row.append(None)# appending None if it does not exist
    
  all_rows.append(row)# appending the data rows into the final list

data = pd.DataFrame(all_rows,columns = ['tweet_text','Date','Time','Likes','Retweets','images'])# converting the list of data rows into a DataFrame

In [17]:
data# displaying the table(DataFrame) 

Unnamed: 0,tweet_text,Date,Time,Likes,Retweets,images
0,We will close the submission portal for submit...,Wed Apr 10 2019,16:47:25 +0000,2,0,
1,Clarification: Our earlier post which indicate...,Wed Apr 10 2019,09:01:29 +0000,1,0,
2,"Many Congratulations to @midasIIITD student, S...",Mon Apr 08 2019,07:08:12 +0000,18,2,
3,My twitter developer account is taking time to...,Sun Apr 07 2019,13:45:58 +0000,0,0,
4,We request all students whose interview are sc...,Sun Apr 07 2019,11:43:24 +0000,1,1,
5,"Other queries: ""none of the Tweeter Apis give ...",Sun Apr 07 2019,06:55:19 +0000,5,2,
6,"Other queries: ""do we have to make two differe...",Sun Apr 07 2019,06:53:38 +0000,5,1,
7,"Other queries: ""If using Twitter api, it does ...",Sun Apr 07 2019,05:32:27 +0000,6,1,
8,Response to some queries asked by students on ...,Sun Apr 07 2019,05:29:40 +0000,7,1,
9,We have emailed the task details to all candid...,Fri Apr 05 2019,16:08:37 +0000,11,1,
