# Importing Libraries

In [2]:
import os
import tweepy
import pandas as pd
import numpy as np
import time
import snscrape
import snscrape.modules.twitter as sntwitter
from datetime import datetime

# Using snscrape to collect tweets based on multiple parameters

In [3]:
search_words_list_1 = ["(government hospital ambulance) OR (government hospital corrupt) OR (government hospital doctor) OR (government hospital doctors refused) OR (government hospital postmortem) OR (government hospital dead) OR (government hospital death) OR (government hospital dead body) OR (government hospital negligence) OR (government hospital inhuman) OR (government hospital condition) OR (government hospital deplorable)",
                    "(govt hospital ambulance) OR (govt hospital corrupt) OR (govt hospital doctor) OR (govt hospital doctors refused) OR (govt hospital postmortem) OR (govt hospital dead) OR (govt hospital death) OR (govt hospital dead body) OR (govt hospital negligence) OR (govt hospital inhuman) OR (govt hospital condition) OR (govt hospital deplorable)",
                    "(government hospital @PMOIndia) OR (government hospital @MoHFW_INDIA) OR (government hospital @narendramodi) OR #aiims OR (government hospital #PMOIndia)",
                    "(medical negligence @PMOIndia) OR (medical care denied) OR (medicine expired) OR (oxygen lack) OR (treatment quality) OR (vaccines expire) OR (vaccines expired) OR (unhygienic conditions) OR (unhygienic hospital) OR (lack beds) OR (late doctor) OR (time doctor)",
                    "(hospital malpractice) OR (hospital patient death) OR (hospital no action) OR (government hospital refused treatment) OR (government hospital negligence) OR (government hospital die) OR (government no beds) OR (government no vacancy) OR (government hospital service) OR (government hospital inhuman) OR (hospital protest) OR (government hospital bad)"
                    ]

search_words_list_2 = ["(hospital poor) OR (hospital patient refused) OR (hospital no doctors) OR (hospital rude) OR (hospital no parking) OR (hospital dead) OR (hospital no medicines) OR (government no vacancy) OR (hospital service) OR (hospital inhuman) OR (hospital protest) OR (hospital bad)",
                    "(hospital ambulance) OR (hospital corrupt) OR ( hospital doctor) OR ( hospital doctors refused) OR (hospital postmortem) OR (hospital dead) OR (hospital death) OR (hospital dead body) OR (hospital negligence) OR (hospital inhuman) OR (hospital condition) OR (hospital deplorable)",
                    "(hospital malpractice @PMOIndia) OR (hospital patient death) OR (hospital no action @PMOIndia) OR (government hospital refused treatment @PMOIndia) OR (government hospital negligence @PMOIndia) OR (government hospital die) OR (government no beds) OR (government no vacancy @PMOIndia)",
                    "(government hospital service @PMOIndia) OR (government hospital inhuman) OR (hospital protest @PMOIndia) OR (government hospital bad @PMOIndia)",
                    "(hospital malpractice @MoHFW_INDIA) OR (hospital patient death @MoHFW_INDIA) OR (hospital no action @MoHFW_INDIA) OR (government hospital refused treatment @MoHFW_INDIA) OR (government hospital negligence @MoHFW_INDIA) OR (government hospital die @MoHFW_INDIA) OR (government no beds @MoHFW_INDIA)",
                    "(government no vacancy @MoHFW_INDIA) OR (government hospital service @MoHFW_INDIA) OR (government hospital inhuman @MoHFW_INDIA) OR (hospital protest @MoHFW_INDIA) OR (government hospital bad @MoHFW_INDIA)"
                    ]

search_words_list_3 = [ "(hospital poor @MoHFW_INDIA) OR (hospital patient refused @MoHFW_INDIA) OR (hospital no doctors @MoHFW_INDIA) OR (hospital rude @MoHFW_INDIA) OR (hospital no parking @MoHFW_INDIA) OR (hospital dead @MoHFW_INDIA)",
                    "(hospital no medicines @MoHFW_INDIA) OR (government no vacancy @MoHFW_INDIA) OR (hospital service @MoHFW_INDIA) OR (hospital inhuman @MoHFW_INDIA) OR (hospital protest @MoHFW_INDIA) OR (hospital bad @MoHFW_INDIA)",
                    "(hospital poor @PMOIndia) OR (hospital patient refused @PMOIndia) OR (hospital no doctors @PMOIndia) OR (hospital rude @PMOIndia) OR (hospital no parking @PMOIndia) OR (hospital dead @PMOIndia) OR (hospital no medicines @PMOIndia)",
                    "(government no vacancy @PMOIndia) OR (hospital service @PMOIndia) OR (hospital inhuman @PMOIndia) OR (hospital protest @PMOIndia) OR (hospital bad @PMOIndia)",
                    "(hospital ambulance @PMOIndia) OR (hospital corrupt @PMOIndia) OR (hospital doctor @PMOIndia) OR ( hospital doctors refused @PMOIndia) OR (hospital postmortem @PMOIndia) OR (hospital dead @PMOIndia)",
                    "(hospital death @PMOIndia) OR (hospital dead body @PMOIndia) OR (hospital negligence @PMOIndia) OR (hospital inhuman @PMOIndia) OR (hospital condition @PMOIndia) OR (hospital deplorable @PMOIndia)",
                    "(hospital ambulance @MoHFW_INDIA) OR (hospital corrupt @MoHFW_INDIA) OR ( hospital doctor @MoHFW_INDIA) OR ( hospital doctors refused @MoHFW_INDIA) OR (hospital postmortem @MoHFW_INDIA) OR (hospital dead @MoHFW_INDIA)",
                    "(hospital death @MoHFW_INDIA) OR (hospital dead body @MoHFW_INDIA) OR (hospital negligence @MoHFW_INDIA) OR (hospital inhuman @MoHFW_INDIA) OR (hospital condition @MoHFW_INDIA) OR (hospital deplorable @MoHFW_INDIA)"
                    ]

cities = ["Mumbai", "Delhi", "Kolkata", "Chennai", "Bangalore", "Hyderabad"]
                    

In [4]:
# Creating list to append tweet data to
attributes_container = []

# Using TwitterSearchScraper to scrape data and append tweets to list
for  city in cities:
    for words in search_words_list_1:
        query = words + ' since:2018-01-01 until:2023-01-01' + (" near:%s " % city) + 'within:80km lang:en'
        # print(query)
        for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
            attributes_container.append([tweet.date, tweet.content, city])

    for words in search_words_list_2:
        query = words + ' since:2018-01-01 until:2023-01-01' + (" near:%s " % city) + 'within:80km lang:en'
        # print(query)
        for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
            attributes_container.append([tweet.date, tweet.content, city])
        
    for words in search_words_list_3:
        query = words + ' since:2018-01-01 until:2023-01-01' + (" near:%s " % city) + 'within:80km lang:en'
        # print(query)
        for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
            attributes_container.append([tweet.date, tweet.content, city])
            
# Creating a dataframe to load the list
tweets_df = pd.DataFrame(attributes_container, columns=["date_created", "tweet", "city"])

# Checking Dataframe and Converting to CSV file

In [5]:
print('tweets_df.head() = \n', tweets_df.head())
print('tweets_df.tail() = \n', tweets_df.tail())

Unnamed: 0,date_created,tweet,city
0,2021-12-18 06:53:25+00:00,Thanekars are being looted like this by corrup...,Mumbai
1,2021-12-16 03:36:02+00:00,@CMOMaharashtra government hospital is getting...,Mumbai
2,2021-12-03 14:00:01+00:00,Reality of healthcare system in Mumbai Maharas...,Mumbai
3,2021-12-03 13:55:35+00:00,Reality of BMC run hospital Nair hospital in M...,Mumbai
4,2021-11-29 06:04:54+00:00,"We at VIDYA, in partnership with CAPITA, donat...",Mumbai


In [6]:
print("tweets_df.shape = ", tweets_df.shape)
print("Value count of tweets from each city = ", tweets_df['city'].value_counts())

Unnamed: 0,date_created,tweet,city
26966,2020-01-26 17:47:36+00:00,@Just_im_Ajay @KTRTRS @KTRoffice @KTR_News @Et...,Hyderabad
26967,2019-10-27 06:31:59+00:00,"@KTRTRS Sir,\nLot's to be developed in Govt. H...",Hyderabad
26968,2019-07-30 05:59:08+00:00,@draghafur @TheUrgentNeed @drharshvardhan @MoH...,Hyderabad
26969,2019-03-07 18:20:35+00:00,@priyakamal @RTHyderabad May be Untrained Medi...,Hyderabad
26970,2019-03-04 12:42:12+00:00,@TelanganaCMO \n@KTRTRS \n@WHO \n@MoHFW_INDIA ...,Hyderabad


In [7]:
# converting dataframe to csv file
tweets_df.to_csv('/Users/nitanshjain/Documents/Projects/Twitter_Data_Analysis/data/tweets_snscrape_2017_2022.csv')

(26971, 3)