# Ranking Algorithm

# Installs and imports

## Install all required libraries

In [None]:
# Uncomment line below to install all required libraries
# !pip3 install -r ../requirements.txt -q

## Import required libraries

In [2]:
import re
import os
from nltk.corpus import stopwords
import pandas as pd
import pickle
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
import datetime as dt

## Importing dataset

In [3]:
df = pd.read_csv('./data/Final_Predictions.csv')

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,mp,tweet_date,tweet_time,tweet_text,cleaned_tweet,Snowball_Stem,NB_predictions,LSTM_predictions,BLSTM_predictions,SVM_predictions,Ensemble_predictions
0,0,rsprasad,2021-03-31,23:30:00,"Please, don't break the trust.\n\n@narendramod...",please break trust,pleas break trust,0,1,0,1,1
1,1,rsprasad,2021-03-31,23:30:00,"MAINTENANCE OF WIVES, CHILDREN AND PARENTS - h...",maintenance wives children parents l c h r read,mainten wive children parent l c h r read,0,0,0,1,0
2,2,rsprasad,2021-03-31,23:30:00,NIA look into SSR case \n\n@AmitShah @narendra...,nia look ssr case,nia look ssr case,1,1,1,1,1
3,3,rsprasad,2021-03-31,23:30:00,@PavanjitMane1 @PMOIndia @CMOMaharashtra @rspr...,new pan allotted per aadhar many pans issued b...,new pan allot per aadhar mani pan issu bef aad...,1,1,1,1,1
4,4,rsprasad,2021-03-31,23:30:00,@rsprasad @narendramodi @PMOIndia @examwarrior...,one worst govt till interest reduced income ta...,one worst govt till interest reduc incom tax c...,0,0,0,0,0


## Making bins for each MP

In [5]:
mp_list = df['mp'].unique()
mp_list

array(['rsprasad', 'myogiadityanath', 'OfficeofUT', 'rahulgandhi',
       'CMOTamilNadu', 'ArvindKejriwal', 'smritiirani', 'PiyushGoyalOffc',
       'AmitShah', 'nsitharaman', 'narendramodi', 'rajnathsingh',
       'Dev_Fadnavis', 'mamataofficial', 'NitishKumar', 'nitin_gadkari',
       'DrSJaishankar'], dtype=object)

In [6]:
date_bins = dict()

for mp in mp_list:
    maxdtobj = dt.datetime.strptime(max(df[df['mp']==mp].tweet_date), '%Y-%m-%d')
    mindtobj = dt.datetime.strptime(min(df[df['mp']==mp].tweet_date), '%Y-%m-%d')
    days =(maxdtobj.date() - mindtobj.date()).days
    if mp not in date_bins:
#         try:
#             tmp = date_bins[days]
#         except:
#             tmp = []
#         tmp.append(mp)
#         date_bins[days] = tmp
        date_bins[mp] = days

In [7]:
date_bins

{'rsprasad': 1,
 'myogiadityanath': 1,
 'OfficeofUT': 1,
 'rahulgandhi': 0,
 'CMOTamilNadu': 2,
 'ArvindKejriwal': 1,
 'smritiirani': 2,
 'PiyushGoyalOffc': 1,
 'AmitShah': 0,
 'nsitharaman': 1,
 'narendramodi': 0,
 'rajnathsingh': 2,
 'Dev_Fadnavis': 1,
 'mamataofficial': 1,
 'NitishKumar': 4,
 'nitin_gadkari': 2,
 'DrSJaishankar': 1}

In [8]:
rank_df = pd.DataFrame()

In [9]:
rank_df['mp'] = mp_list

In [10]:
rank_df['bin'] = date_bins.values()

In [11]:
rank_df

Unnamed: 0,mp,bin
0,rsprasad,1
1,myogiadityanath,1
2,OfficeofUT,1
3,rahulgandhi,0
4,CMOTamilNadu,2
5,ArvindKejriwal,1
6,smritiirani,2
7,PiyushGoyalOffc,1
8,AmitShah,0
9,nsitharaman,1


## Compute positive and negative tweet percentage

In [12]:
percentage = []
for politician in mp_list:    
    mp_df = df[df.mp == politician]
#     print(mp_df.head())
#     print('__________________')
    sentiment_values = mp_df.Ensemble_predictions.value_counts()
#     print(politician)
#     print(sentiment_values)
#     print(sentiment_values.sum())
    percentage.append((sentiment_values[1]/sentiment_values.sum())*100)
#     print(percentage[-1])

In [13]:
rank_df['Positive_Percentage'] = percentage

In [14]:
rank_df['Negative_Percentage'] = 100-rank_df['Positive_Percentage']

In [15]:
rank_df

Unnamed: 0,mp,bin,Positive_Percentage,Negative_Percentage
0,rsprasad,1,48.688768,51.311232
1,myogiadityanath,1,61.427863,38.572137
2,OfficeofUT,1,47.523762,52.476238
3,rahulgandhi,0,63.309353,36.690647
4,CMOTamilNadu,2,55.643958,44.356042
5,ArvindKejriwal,1,58.959254,41.040746
6,smritiirani,2,59.253499,40.746501
7,PiyushGoyalOffc,1,40.781853,59.218147
8,AmitShah,0,59.381044,40.618956
9,nsitharaman,1,39.228139,60.771861


In [16]:
rank_df = rank_df.sort_values('bin')

In [17]:
new_df = pd.DataFrame()

for days in rank_df.bin.unique():
    temp_df = rank_df[rank_df['bin'] == days]
    temp_df = temp_df.sort_values('Positive_Percentage', ascending = False)
    new_df = pd.concat([new_df,temp_df])    

In [18]:
rank_df = new_df

In [19]:
rank_df

Unnamed: 0,mp,bin,Positive_Percentage,Negative_Percentage
3,rahulgandhi,0,63.309353,36.690647
8,AmitShah,0,59.381044,40.618956
10,narendramodi,0,57.757758,42.242242
1,myogiadityanath,1,61.427863,38.572137
12,Dev_Fadnavis,1,60.528893,39.471107
5,ArvindKejriwal,1,58.959254,41.040746
16,DrSJaishankar,1,57.254519,42.745481
13,mamataofficial,1,56.006006,43.993994
0,rsprasad,1,48.688768,51.311232
2,OfficeofUT,1,47.523762,52.476238


# Right to csv

In [20]:
rank_df.to_csv('./data/rank.csv', index = False)