# Imports

In [1]:
import pickle
import os
import datetime
import pymongo
import pandas as pd
import numpy as np
from subprocess import call
from tqdm import tqdm
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Bash Commands

In [2]:
df = pd.read_csv("./india_info.csv")
india_info = df[["state", "city"]]
bash_command_list = []

In [3]:
for i, row in india_info.iterrows():
    state, city = row["state"], row["city"]
    temp = "twint -s 'Unlock1 AND {}' -o 'output/{}--{}.csv' --csv".format(
        city, state, city
    )
    bash_command_list.append(temp)

In [5]:
file_temp = open("bash_command_list.txt", "w")
file_temp.writelines(bash_command_list)
file_temp.close()

In [4]:
for bash_command in bash_command_list:
    call(bash_command, shell=True)

# Scrapped Data

In [5]:
scrapped_data = os.listdir("./output/")

In [6]:
output_df = pd.DataFrame()

for file in scrapped_data:
    file_df = pd.read_csv("output/{}".format(file))

    temp = file.split(".")[0].split("--")
    state, city = temp[0], temp[1]

    file_df["state"] = state
    file_df["city"] = city

    output_df = output_df.append(file_df)

output_df = output_df.reset_index()

df_to_work_with = output_df[["tweet", "link", "date", "time", "state", "city"]]

In [11]:
df_to_work_with.info

<bound method DataFrame.info of                                                   tweet  \
0     No. of Containment Zones in Vijayawada (VMC li...   
1     'Unlock 1.0' evokes huge response #Covid-19 #U...   
2     Are people hesitant to venture out? Mall in Vi...   
3     Minister Vellampalli Srinivas inspects arrange...   
4     In Pics: With relaxations given to restaurants...   
...                                                 ...   
6778  स्वास्थ्य सेवाकर्मियों के साथ भेदभाव न करें। त...   
6779  #IndiaFightsCorona\n\nआपस में उचित दूरी रखते ह...   
6780  #IndiaFightsCorona\n\nखुले में थूकना स्वास्थ्य...   
6781  30 जून तक बढ़ा लॉकडाउन, साथ मे शर्त के साथ ये ...   
6782  City Center Korba is open from 10 am to 9 pm w...   

                                                   link        date      time  \
0     https://twitter.com/tharunboda/status/12703037...  2020-06-09  16:06:29   
1     https://twitter.com/TheHansIndiaWeb/status/127...  2020-06-09  02:59:26   
2     https://tw

# Classification

In [8]:
model = load_model("./model/model_v6.h5")
tokenizer = pickle.load(open("./model/tokenizer_v6.pickle", "rb"))
score_list = ["anger", "happiness", "neutral", "sadness", "worry"]

In [9]:
def analyze(tweet):
    sequences = tokenizer.texts_to_sequences([row["tweet"]])
    new_processed = pad_sequences(sequences, padding="post", maxlen=30)
    return model.predict(new_processed)

In [12]:
output = pd.DataFrame()

for index, row in tqdm(
    df_to_work_with[["date", "time", "tweet",
                     "link", "state", "city"]].iterrows()
):

    #if index % 20 == 0:
        #print("At Index: {}".format(index))

    date = row["date"]
    time = row["time"]

    date_time = date + " " + time
    date_time = datetime.datetime.strptime(date_time, "%Y-%m-%d %H:%M:%S")

    tweet = row["tweet"]
    link = row["link"]
    state = row["state"]
    city = row["city"]

    score = analyze(tweet)

    anger = score[0][0]
    happiness = score[0][1]
    neutral = score[0][2]
    sadness = score[0][3]
    worry = score[0][4]

    result = {
        "date_time": date_time,
        "tweet": tweet,
        "link": link,
        "state": state,
        "city": city,
        "anger": anger,
        "happiness": happiness,
        "neutral": neutral,
        "sadness": sadness,
        "worry": worry,
    }

    output = output.append(result, ignore_index=True)

output[
    [
        "date_time",
        "tweet",
        "link",
        "state",
        "city",
        "anger",
        "happiness",
        "neutral",
        "sadness",
        "worry",
    ]
].to_csv("{}-backup.csv".format(datetime.date.today()))

6783it [07:42, 14.67it/s]


# Sentiments

In [13]:
mongo_url = '#'
myclient = pymongo.MongoClient(mongo_url)
mydb = myclient["covidian"]

mycol = mydb["sentiments"]
df_for_sentiments = output[
    [
        "date_time",
        "tweet",
        "link",
        "state",
        "city",
        "anger",
        "happiness",
        "neutral",
        "sadness",
        "worry",
    ]
]

for i, row in df_for_sentiments.iterrows():
    mycol.insert_one(dict(row))

# Sentiments City

In [14]:
mycol = mydb["sentiments_city"]
df_for_sentiments_city = (
    output[
        [
            "date_time",
            "tweet",
            "link",
            "state",
            "city",
            "anger",
            "happiness",
            "neutral",
            "sadness",
            "worry",
        ]
    ]
    .groupby(["state", "city"], as_index=False)
    .mean()
)

for i, row in df_for_sentiments_city.iterrows():
    mycol.insert_one(dict(row))

# Sentiments State

In [15]:
mycol = mydb["sentiments_state"]
df_for_sentiments_state = (
    output[
        [
            "date_time",
            "tweet",
            "link",
            "state",
            "city",
            "anger",
            "happiness",
            "neutral",
            "sadness",
            "worry",
        ]
    ]
    .groupby(["state"], as_index=False)
    .mean()
)
for i, row in df_for_sentiments_state.iterrows():
    mycol.insert_one(dict(row))

# Sentiment Country

In [16]:
mycol = mydb["sentiments_country"]
df_for_sentiments_country = output[
    [
        "date_time",
        "tweet",
        "link",
        "state",
        "city",
        "anger",
        "happiness",
        "neutral",
        "sadness",
        "worry",
    ]
].mean()
sentiments_country = dict()
sentiments_country["country"] = "India"
sentiments_country.update(df_for_sentiments_country.to_dict())
mycol.insert_one(sentiments_country)

<pymongo.results.InsertOneResult at 0x6d307d280>