In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn import svm

def serialize(dataFrame, column):
    return [x for x in range(len(dataFrame[column].unique()))]

def replaceDict(dataFrame, column):
    vals = serialize(dataFrame, column)
    return dict(zip(dataFrame[column].unique(), vals))



def multiSearch(df, column, searchTerms):
    if type(searchTerms) is list:
        return df.query(' | '.join(
            [f'{column} == "{term}"' for term in searchTerms]
        ))
    elif type(searchTerms) is str:
        return df.query(f'{column} == "{searchTerms}"')
    else:
        return df.query(f'{column} == {searchTerms}')

def multiContains(df, column, searchTerms):
    if type(searchTerms) is list:
        return df[df[column].str.contains('|'.join(searchTerms))]
    else:
        return df[df[column].str.contains(searchTerms)]


UNREST_COLUMNS = ["EVENT_ID_CNTY", 
                    "EVENT_DATE", 
                    "EVENT_TYPE", 
                    "REGION", 
                "FATALITIES"]

CASES_COLUMNS = ["iso_code",
                "continent",
                "location", 
                "date", 
                "total_cases", 
                "new_cases", 
                "total_deaths", 
                "reproduction_rate", 
                "hosp_patients", 
                "positive_rate", 
                "stringency_index", 
                "population",
                "median_age",
                "gdp_per_capita",
                "life_expectancy"
            ]

unrest_df = pd.read_csv("./coronavirus_Oct31.csv")
covid_cases_df = pd.read_csv("./owid-covid-data.csv")

irl_afg = multiSearch(covid_cases_df, 'iso_code', ['AFG', 'IRL'])

unrest_df = unrest_df[unrest_df.columns.intersection(UNREST_COLUMNS)]
covid_cases_df = covid_cases_df[covid_cases_df.columns.intersection(CASES_COLUMNS)]


unrest_afg = multiContains(unrest_df,"EVENT_ID_CNTY","AFG")

cases_afg = multiSearch(covid_cases_df, 'iso_code', "AFG")

unrest_afg.EVENT_DATE = pd.to_datetime(unrest_afg.EVENT_DATE)
cases_afg.date = pd.to_datetime(cases_afg.date)

merge = unrest_afg.merge(cases_afg, how="inner", left_on="EVENT_DATE", right_on="date")

classification = serialize(unrest_df, "EVENT_TYPE")

issueType = unrest_afg['EVENT_TYPE']
issueType = issueType.replace(replaceDict(unrest_df, "EVENT_TYPE"))

merge = merge.drop(['EVENT_TYPE', 'EVENT_DATE', 'EVENT_ID_CNTY', 'REGION', 'iso_code', 'continent', 'location', 'date'], axis=1).fillna(0)
print(merge)
# mergeNormal = normalize(merge)
min_max_scaler = preprocessing.MinMaxScaler()

mergeNormal = min_max_scaler.fit_transform(merge)
mergeNormal = pd.DataFrame(mergeNormal)
mergeNormal

clf = svm.SVC()
clf.fit(mergeNormal, issueType)
# print(issueType)
clf.predict(mergeNormal)






    FATALITIES  total_cases  new_cases  total_deaths  reproduction_rate  \
0            0         24.0        0.0           0.0               0.00   
1            0         42.0        2.0           1.0               0.00   
2            0         42.0        2.0           1.0               0.00   
3            0         42.0        2.0           1.0               0.00   
4            0         75.0       33.0           1.0               0.00   
5            0         75.0       33.0           1.0               0.00   
6            0         75.0        0.0           1.0               0.00   
7            0         91.0       16.0           2.0               1.42   
8            0        114.0        8.0           4.0               1.44   
9            0        114.0        8.0           4.0               1.44   
10           0        141.0       27.0           4.0               1.44   
11           0        367.0       30.0          11.0               1.42   
12           0        423

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3])