In [23]:
import pandas as pd
from textblob import TextBlob

# load data

In [24]:
df = pd.read_csv('tweets.csv', header=0)
df.head()

Unnamed: 0,username,date,text,tags,translated_text,sentiment,location,lat,long,sentiment_label,KADAA_ID,KADAA_AR,KADAA_EN,MOHAFAZA_ID,MOHAFAZA_AR,MOHAFAZA_EN
0,Lebanon 24,2020-07-12,"#كورونا يتلف رئة ""أبو عزرائيل"" (صورة) #العراق#...",#كورونا;#العراق;#lebanon24,"Corona damages the lung of ""Abu Azrael"" (photo...",0.0,صور,33.2733,35.1939,Neutral,83000,صور,Sour,8,الجنوب,South
1,Sana,2020-07-12,حضرة اساتذة القانون بالجامعة اللبنانية المحترم...,#كورونا;#امتحانات_الموت,Honorable professors of law at the Lebanese Un...,0.325,العربانية,33.8772,35.6786,Positive,51000,بعبدا,Baabda,5,جبل لبنان,Mount Lebanon
2,Jaras Scoop FM,2020-07-12,"كشفت ""رابطة طلاب الجامعة اللبنانية"" عبر تويتر ...",#كورونا,"The ""Lebanese University Students Association""...",-0.3,طلصا,33.4833,35.3471,Negative,81000,صيدا,Saida,8,الجنوب,South
3,Lebanon Debate,2020-07-12,"حالة ""#كورونا"" جديدة في الجامعة اللبنانية - ال...",#كورونا,"New ""# Corona"" case at the Lebanese University...",0.136364,حاقل,34.1705,35.7501,Positive,56000,جبيل,Jbeil,5,جبل لبنان,Mount Lebanon
4,عاجل - قناة فلسطين اليوم,2020-07-12,#عاجلمدير مكتب إعلام الأسرى ناهد الفاخوري: حذر...,#كورونا;#عاجل,#UrgentThe director of the Prisoners' Informat...,-0.283333,مكاتبة,33.5628,35.3789,Negative,81000,صيدا,Saida,8,الجنوب,South


# get sentiment

In [25]:
def get_sentiment_label(sentiment):
    if sentiment == 0:
        return 'Neutral'
    return 'Positive' if sentiment > 0 else 'Negative'

def get_sentiment(text):
    sentiment = TextBlob(text).sentiment.polarity
    return pd.Series([sentiment, get_sentiment_label(sentiment)])

In [26]:
df[['sentiment', 'sentiment_label']] = df.translated_text.apply(get_sentiment)

In [27]:
df.sentiment_label.value_counts()

Neutral     22328
Positive    20680
Negative     8675
Name: sentiment_label, dtype: int64

# how any() and all() works

In [29]:
any([])

False

In [30]:
any([True, False, False])

True

In [31]:
any([False, False, False])

False

In [32]:
all([])

True

In [33]:
all([True, False, False])

False

In [34]:
all([False, False, False])

False

In [35]:
all([True, True, True])

True

# keywords about cases

In [36]:
keywords = ['cases', 'daily', 'report', 'اصابات', 'اصابة', 'حالات', 'حالة', 'اليومي', 'التقرير']

# get neutral rows

In [37]:
neutrals = df[df.sentiment_label == 'Neutral']
len(neutrals)

22328

# neutral containing keywords

In [38]:
neutrals_keywords = neutrals[neutrals.text.apply(lambda text: any([keyword for keyword in text.split() if keyword in keywords]))]
len(neutrals_keywords)

889

In [39]:
neutrals_keywords.sentiment

10       0.0
35       0.0
37       0.0
64       0.0
79       0.0
        ... 
50963    0.0
51229    0.0
51329    0.0
51359    0.0
51433    0.0
Name: sentiment, Length: 889, dtype: float64

# split by date

In [41]:
neutrals_keywords[['year', 'month', 'day']] = neutrals_keywords.date.str.split('-', expand=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [46]:
neutrals_keywords

Unnamed: 0,username,date,text,tags,translated_text,sentiment,location,lat,long,sentiment_label,KADAA_ID,KADAA_AR,KADAA_EN,MOHAFAZA_ID,MOHAFAZA_AR,MOHAFAZA_EN,year,month,day
10,Lebanon 24,2020-07-12,"حالة ""#كورونا"" بكلية الصحة - مجمّع الحدث #leba...",#كورونا;#lebanon24,"""Corona"" case at the Faculty of Health - event...",0.0,صور,33.2733,35.1939,Neutral,83000,صور,Sour,8,الجنوب,South,2020,07,12
35,Andre Dagher,2020-07-12,86 حالة #كورونا في #لبنان اليوم !!! بتتذكرو و...,#لبنان;#كورونا,86 cases of Corona in Lebanon today !!! Do you...,0.0,حاقل,34.1705,35.7501,Neutral,56000,جبيل,Jbeil,5,جبل لبنان,Mount Lebanon,2020,07,12
37,Rima Saliba,2020-07-12,86 حالة #كورونا اليوم يا جماعة عم تفلت الامور ...,#كورونا;#بلا_مخ,86 case # Corona today uncle group things get ...,0.0,حاقل,34.1705,35.7501,Neutral,56000,جبيل,Jbeil,5,جبل لبنان,Mount Lebanon,2020,07,12
64,Mohamad chehadeh,2020-07-10,مع ازدياد حالات #كورونا في لبنان بشكل كبيرثمة ...,#كورونا,With the increase of # Corona cases in Lebanon...,0.0,معاد,34.1950,35.6811,Neutral,56000,جبيل,Jbeil,5,جبل لبنان,Mount Lebanon,2020,07,10
79,Al Joumhouria,2020-07-10,عدد اصابات #كورونا يفوق الرقم المعلن عنه!https...,#كورونا,The number of injuries # Corona exceeds the nu...,0.0,موليد,34.3950,35.9758,Neutral,77000,المنية-الضنّية,El Minieh-Dennieh,7,الشمال,North,2020,07,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50963,youssefabdlnabi,2020-03-09,٤١ حالة ٤ منها حرجة ،، \n#كورونا_لبنان,#كورونا_لبنان,"41 cases, 4 of which are critical.\n# Corona_L...",0.0,العصبة,34.1872,35.7886,Neutral,56000,جبيل,Jbeil,5,جبل لبنان,Mount Lebanon,2020,03,09
51229,danydiab1,2020-02-21,إعلان حالة الطوارئ واجب على الدولة كي لا يتفشّ...,#كورونا;#لبنان,It is the duty of the state to declare a state...,0.0,كلش,34.1192,35.7317,Neutral,56000,جبيل,Jbeil,5,جبل لبنان,Mount Lebanon,2020,02,21
51329,josephhabibi_md,2020-03-29,"Back In my hometown #miziara in #Lebanon 🇱🇧, t...",#miziara;#lebanon;#virtual;#proud,"Back In my hometown #miziara in #Lebanon 🇱🇧, t...",0.0,beino,34.5378,36.1789,Neutral,11000,عكّار,Akkar,1,عكار,Akkar,2020,03,29
51359,mamedlej,2020-03-19,Thank you for all the efforts you deploy in ou...,,Thank you for all the efforts you deploy in ou...,0.0,بيروت,33.8719,35.5097,Neutral,31000,بيروت,Beirut,3,بيروت,Beirut,2020,03,19


# group by month

In [48]:
sorted(neutrals_keywords.month.unique())

['01', '02', '03', '04', '05', '06', '07', '08']

In [53]:
month_groups = neutrals_keywords.groupby('month')
print('Cases Data over months & days')
print('******************************')
for month, month_data in month_groups:
    print('[Month]:', month)
    print('[Days]:', sorted(month_data.day.unique()))
    print('------------------------------------------')

Cases Data over months & days
******************************
[Month]: 01
[Days]: ['03', '04', '07', '10', '16', '21', '22', '27', '29', '30', '31']
------------------------------------------
[Month]: 02
[Days]: ['07', '08', '19', '21', '22', '23', '25', '26', '27', '28', '29']
------------------------------------------
[Month]: 03
[Days]: ['02', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31']
------------------------------------------
[Month]: 04
[Days]: ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']
------------------------------------------
[Month]: 05
[Days]: ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '3

# get average cases per month

In [55]:
import re

def get_average_per_month(data):
    pattern = re.compile(r'\d+')
    for d in data:
        numbers = re.findall(pattern, d)
        if numbers:
            print(numbers)

for month, month_data in month_groups:
    print(month, '==>', get_average_per_month(month_data.text))
    print('-----------------------------------')

['4']
['3']
['20181004']
['23', '72', '2', '35']
['3']
['3', '2', '80', '99']
['15']
['3000', '2019', '900', '1000', '500', '450', '60', '150']
['3']
['10', '9']
['٧', '1223358139669463040']
['٤', '3']
01 ==> None
-----------------------------------
['19', '1943']
['19']
['8']
['15']
['1', '8', '3']
['1', '8', '3']
['2', '10', '000', '400']
['2']
['2']
['2', '8', '3']
['19', '٣١٦', '٧']
['24', '1233543355604328455']
['19']
['32', '7', '24', '1232089635888779266']
['٨', '٦', '١']
['8']
['20']
['15']
['١٣٣']
['24', '1231001859600592896']
['٢٠٠٠']
02 ==> None
-----------------------------------
['٢٧']
['1929']
['٢٣', '٢٠٢٠', '٢٦٧', '21', '19']
['١٢', '٢٠']
['٧٧', '19']
['٤١', '2019']
['٣٢', '٣']
['٢٤']
['13']
['٤٦٣']
['463', '17', '12', '19']
['31', '3', '2020', '463', '17']
['45', '3117']
['17']
['٤٤٦']
['446', '11']
['٨', '٤٤٦', '١١', '٣٢']
['٤٣٨', '٣٩٨']
['19', '438']
['438', '10']
['438']
['٤٣٨']
['10']
['2019']
['9']
['٧', '20']
['٤١٢']
['٤١٢']
['٤١٢']
['3']
['٢٣', '٢٧']
['391', '23'