In [10]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

pd.options.mode.chained_assignment = 'warn'

UNREST_COLUMNS = ["EVENT_ID_CNTY", 
                    "EVENT_DATE", 
                    "EVENT_TYPE", 
                    "REGION", 
                "FATALITIES",
                "TIMESTAMP"]

CASES_COLUMNS = ["iso_code",
                "continent",
                "location", 
                "date", 
                "total_cases", 
                "new_cases", 
                "total_deaths", 
                "reproduction_rate", 
                "hosp_patients", 
                "positive_rate", 
                "stringency_index", 
                "population",
                "median_age",
                "gdp_per_capita",
                "life_expectancy",
            ]

def serialize(dataFrame, column):
    return [x for x in range(len(dataFrame.loc[:, column].unique()))]

def replaceDict(dataFrame, column):
    vals = serialize(dataFrame, column)
    return dict(zip(dataFrame.loc[:, column].unique(), vals))

def multiSearch(df, column, searchTerms):
    if type(searchTerms) is list:
        return df.query(' | '.join(
            [f'{column} == "{term}"' for term in searchTerms]
        ))
    elif type(searchTerms) is str:
        return df.query(f'{column} == "{searchTerms}"')
    else:
        return df.query(f'{column} == {searchTerms}')

def multiContains(df, column, searchTerms):
    if type(searchTerms) is list:
        return df[df.loc[:, column].str.contains('|'.join(searchTerms))]
    else:
        return df[df.loc[:, column].str.contains(searchTerms)]

#Create the training data set of merged PD's and the result
def retrieveTrainingData(isoCodes=None):
    unrest_df = pd.read_csv("./coronavirus_Oct31.csv")
    unrest_df = unrest_df[unrest_df.loc[:, "EVENT_TYPE"] != 'Strategic developments']
 
    covid_cases_df = pd.read_csv("./owid-covid-data.csv")
    covid_cases_df = covid_cases_df[covid_cases_df.loc[:, "iso_code"] != "OWID_WRL"]
    covid_cases_df.dropna() 

    classes = unrest_df.EVENT_TYPE.unique()
    print(classes)

    unrest_df = unrest_df.loc[:, unrest_df.columns.intersection(UNREST_COLUMNS)]
    covid_cases_df = covid_cases_df.loc[:, covid_cases_df.columns.intersection(CASES_COLUMNS)]

    if (isoCodes == None):
      unrest = unrest_df
      cases = covid_cases_df
    else:
      unrest = multiContains(unrest_df, "EVENT_ID_CNTY", isoCodes)
      cases = multiSearch(covid_cases_df, 'iso_code', isoCodes)

    unrest.loc[:, "EVENT_DATE"] = pd.to_datetime(unrest.loc[:, "EVENT_DATE"])
    cases.loc[:, "date"] = pd.to_datetime(cases.loc[:, "date"]) 

    merge = unrest.merge(cases, how="inner", left_on="EVENT_DATE", right_on="date")

    merge = merge.drop(['EVENT_ID_CNTY'], axis=1)
    merge = merge.drop_duplicates()

    issueType = merge['EVENT_TYPE']
    issueType = issueType.replace(replaceDict(unrest_df, "EVENT_TYPE"))

    merge = merge.drop(['EVENT_TYPE', 'EVENT_DATE', 'REGION', 'iso_code', 'continent', 'location', 'date', 'TIMESTAMP', "FATALITIES"], axis=1).fillna(0)

    return merge, issueType, classes

data, target, classes = retrieveTrainingData()

total_value = zip(data, target)

#the data is fairly imbalanced so use random overfitting first to incrase ratios of minor classes
total_value = zip(data, target)

x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
model = MLPClassifier(hidden_layer_sizes=5, alpha=1.0/5).fit(x_train, y_train)
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)

print("training\n", classification_report(y_train, y_train_pred))
print(confusion_matrix(y_train, y_train_pred))

print("test\n", classification_report(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))


['Protests' 'Riots' 'Battles' 'Violence against civilians'
 'Explosions/Remote violence']
0     1
1     1
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    1
11    1
12    0
13    0
14    0
15    0
16    0
17    0
18    2
19    2
Name: EVENT_TYPE, dtype: int64
training
               precision    recall  f1-score   support

           0       0.62      1.00      0.77        10
           1       0.00      0.00      0.00         4
           2       0.00      0.00      0.00         2

    accuracy                           0.62        16
   macro avg       0.21      0.33      0.26        16
weighted avg       0.39      0.62      0.48        16

[[10  0  0]
 [ 4  0  0]
 [ 2  0  0]]
test
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

[[4]]


In [11]:
import matplotlib.pyplot as plt

# plt.rc()

ModuleNotFoundError: No module named 'matplotlib'