In [14]:
import pandas as pd
from matplotlib.pyplot import plot, bar

# config

In [15]:
read_from = 'corona_lb_data.csv'
save_to = '..\\corona_lb_ratios.csv'

pd.set_option('display.max_rows', 255)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# read corona lb data

In [16]:
corona = pd.read_csv(read_from, header=0)
corona.head()

Unnamed: 0,date,year,month,day,new_cases,new_deaths,prev_cases,prev_deaths,total_cases,total_deaths
0,2/1/2020,2020,2,1,0,0,0,0,0,0
1,2/2/2020,2020,2,2,0,0,0,0,0,0
2,2/3/2020,2020,2,3,0,0,0,0,0,0
3,2/4/2020,2020,2,4,0,0,0,0,0,0
4,2/5/2020,2020,2,5,0,0,0,0,0,0


# months data

In [17]:
month_groups = corona.groupby('month')

months_data = {}

for month, month_data in month_groups:
    average_cases = int(month_data.new_cases.mean())
    average_death = int(month_data.new_deaths.mean())
    
    month_cases = int(month_data.new_cases.sum())
    month_deaths = int(month_data.new_deaths.sum())
    
    comulative_cases = int(month_data.total_cases.max())
    comulative_deaths = int(month_data.total_deaths.max())
    
    months_data[month] = {
        'average_cases':average_cases,
        'average_deaths':average_death,
        'month_cases': month_cases,
        'month_deaths': month_deaths,
        'comulative_cases': comulative_cases,
        'comulative_deaths': comulative_deaths,
    }

# sentiment ratio

In [18]:
cases_day_count = deaths_day_count = 0
cases_diff_sum = 0

def calculate(row):
    global cases_day_count, deaths_day_count, cases_diff_sum
    
    if row.new_cases:
        cases_day_count += 1
       
    if row.new_deaths:
        deaths_day_count += 1

    # ----------------------------------------------------------------------

    # method 1 - month average, by overall month data
    avg_cases1 = months_data[row.month]['average_cases']
    avg_deaths1 = months_data[row.month]['average_deaths']
    
    cases_ratio1 = row.new_cases / avg_cases1 if avg_cases1 else 0
    deaths_ratio1 = row.new_deaths / avg_deaths1 if avg_deaths1 else 0

    sentiment_ratio1 = (cases_ratio1 + deaths_ratio1) / 2
    
    # ----------------------------------------------------------------------

    # method 2 - cumulative average, by cumulative cases days data
    avg_cases2 = (row.total_cases / cases_day_count) if cases_day_count else 0
    avg_deaths2 = (row.total_deaths / deaths_day_count) if deaths_day_count else 0

    cases_ratio2 = row.new_cases / avg_cases2 if avg_cases2 else 0
    deaths_ratio2 = row.new_deaths / avg_deaths2 if avg_deaths2 else 0

    sentiment_ratio2 = (cases_ratio2 + deaths_ratio2) / 2
    
    # ----------------------------------------------------------------------
    
    # method 3 - cumulative diff average, by cumulative cases days diff data (death data is of no use here)
    cases_diff = row.prev_cases - row.new_cases

    avg_cases3 = (cases_diff_sum / cases_day_count) if cases_day_count else 0
    
    if row.new_cases:
        cases_diff_sum += abs(row.prev_cases - row.new_cases)

    sentiment_ratio3 = cases_diff / avg_cases3 if avg_cases3 else 0

    # ----------------------------------------------------------------------
        
    # method 4 - simple direct ratio
    cases_max, cases_min = (row.new_cases, -row.prev_cases) if row.prev_cases < row.new_cases else (row.prev_cases, row.new_cases)
    cases_ratio4 = cases_min / cases_max if cases_max else 0
    
    deaths_max, deaths_min = (row.new_deaths, -row.prev_deaths) if row.prev_deaths < row.new_deaths else (row.prev_deaths, row.new_deaths)
    deaths_ratio4 = deaths_min / deaths_max if deaths_max else 0

    sentiment_ratio4 = (cases_ratio4 + deaths_ratio4) / 2

    # ----------------------------------------------------------------------
        
    return pd.Series([sentiment_ratio1, sentiment_ratio2, sentiment_ratio3, sentiment_ratio4])

corona[['month_avg_ratio', 'cumulative_avg_ratio', 'diff_avg_ratio', 'simple_ratio']] = corona.apply(calculate, axis=1)

In [19]:
corona.describe()

Unnamed: 0,year,month,day,new_cases,new_deaths,prev_cases,prev_deaths,total_cases,total_deaths,month_avg_ratio,cumulative_avg_ratio,diff_avg_ratio,simple_ratio
count,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0
mean,2020.0,5.353448,15.25431,125.948276,1.228448,121.612069,1.181034,4081.517241,52.935345,0.624169,1.660401,-0.34212,0.04112
std,0.0,2.204891,8.732185,211.031987,2.338176,203.055439,2.248998,6820.027879,61.742309,0.567001,1.665754,2.940496,0.374102
min,2020.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-11.045872,-0.850893
25%,2020.0,3.0,8.0,5.75,0.0,5.0,0.0,455.0,21.0,0.1875,0.328588,-1.633054,-0.253224
50%,2020.0,5.0,15.0,18.0,0.0,17.5,0.0,1165.5,29.0,0.529514,0.9487,0.0,0.0
75%,2020.0,7.0,23.0,142.25,1.25,134.25,1.0,3448.75,61.0,0.902144,2.894445,0.735131,0.329762
max,2020.0,9.0,31.0,1006.0,18.0,779.0,18.0,29220.0,285.0,2.968085,6.502567,8.701205,0.961623


# scale ratios into range [-1, 1] using MinMaxScaler()

In [20]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

def min_max_scale(df, column_name, scale=(-1, 1)):
    _2d_array = np.array(corona[column_name]).reshape(-1, 1)
    scaler = MinMaxScaler(scale)
    data = scaler.fit_transform(_2d_array)
    print('Max:', scaler.data_max_)
    print('Min:', scaler.data_min_)
    column_data = list(data.reshape(1, -1)[0])
    df[column_name] = pd.Series(column_data)

In [21]:
min_max_scale(corona, 'month_avg_ratio')

Max: [2.96808511]
Min: [0.]


In [22]:
min_max_scale(corona, 'cumulative_avg_ratio')

Max: [6.50256651]
Min: [0.]


In [23]:
min_max_scale(corona, 'diff_avg_ratio')

Max: [8.70120482]
Min: [-11.04587156]


In [24]:
min_max_scale(corona, 'simple_ratio')

Max: [0.96162281]
Min: [-0.85089286]


In [25]:
corona.describe()

Unnamed: 0,year,month,day,new_cases,new_deaths,prev_cases,prev_deaths,total_cases,total_deaths,month_avg_ratio,cumulative_avg_ratio,diff_avg_ratio,simple_ratio
count,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0,232.0
mean,2020.0,5.353448,15.25431,125.948276,1.228448,121.612069,1.181034,4081.517241,52.935345,-0.579413,-0.489309,0.084085,-0.015719
std,0.0,2.204891,8.732185,211.031987,2.338176,203.055439,2.248998,6820.027879,61.742309,0.382065,0.512337,0.297816,0.412798
min,2020.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0
25%,2020.0,3.0,8.0,5.75,0.0,5.0,0.0,455.0,21.0,-0.873656,-0.898936,-0.046662,-0.340509
50%,2020.0,5.0,15.0,18.0,0.0,17.5,0.0,1165.5,29.0,-0.643195,-0.708208,0.118735,-0.061092
75%,2020.0,7.0,23.0,142.25,1.25,134.25,1.0,3448.75,61.0,-0.392104,-0.109753,0.19319,0.30278
max,2020.0,9.0,31.0,1006.0,18.0,779.0,18.0,29220.0,285.0,1.0,1.0,1.0,1.0


# save

In [26]:
corona.to_csv(save_to, index=False)