In [None]:
import pandas as pd
import numpy as np

from sklearn.dummy import DummyClassifier


from sklearn.metrics import confusion_matrix, plot_confusion_matrix,\
    precision_score, recall_score, accuracy_score, f1_score, log_loss,\
    roc_curve, roc_auc_score, classification_report

In [None]:
df = pd.read_csv("df_train.csv", low_memory = False)
df['Respondent Address (Zip Code)'] = df['Respondent Address (Zip Code)'].astype(str)

In [None]:
df.shape

In [None]:
df.head(2)

In [None]:
df = df.drop("Unnamed: 0", axis=1)

In [None]:
# Create a function to fill in UNKNOWN for all NAs in the dataframe
def fill_na(column):
    
    for x in column:
        df[x].fillna('UNKNOWN', inplace=True)
    
    return df.head(2)

In [None]:
column_names = list(df.columns)
fill_na(column_names)

In [None]:
# check to see that is the case
df.info()

In [None]:
df["Charge #2: Infraction Amount"].value_counts().head(3)

## Bring in neighborhood level income data from the census 

1. main problem is USPS zip code (which is what the main df has) is different from census's zip code tabulation area number (thought a signficant portion of them match up identically). For example, one's USPS zip code could be 11333 but its zip code tabulation area could be 11332

2. Need to go through the following steps to get the census data to match up with the main dataframe 

     A. add a new column with ZIP Code Tabulation Areas (ZCTAs) so to pull census data using ZCTA
     
     B. web scrape census to get the list of ZCTA based on the dataframe 
     
     C. merge the dataframes together so each row contain neighborhood level income data 

In [None]:
nyc_zip_codes = ["10001", "10002", "10003", "10004", "10005", "10006",
                 "10007","10009","10010","10011","10012","10013","10014",
                 "10015","10016","10017","10018","10019","10020","10021",
                 "10022","10023","10024","10025","10026","10027","10028",
                 "10029","10030","10031","10032","10033","10034","10035",
                 "10036","10037","10038","10039","10040","10041","10044",
                 "10045","10048","10055","10060","10069","10090","10095",
                 "10098","10099","10103","10104","10105","10106","10107",
                 "10110","10111","10112","10115","10118","10119","10120",
                 "10121","10122","10123","10128","10151","10152","10153",
                 "10154","10155","10158","10161","10162","10165","10166",
                 "10167","10168","10169","10170","10171","10172","10173",
                 "10174","10175","10176","10177","10178","10199","10270",
                 "10271","10278","10279","10280","10281","10282","10301",
                 "10302","10303","10304","10305","10306","10307","10308",
                 "10309","10310","10311","10312","10314","10451","10452",
                 "10453","10454","10455","10456","10457","10458","10459",
                 "10460","10461","10462","10463","10464","10465","10466",
                 "10467","10468","10469","10470","10471","10472","10473",
                 "10474","10475","11004","11101","11102","11103","11104",
                 "11105","11106","11109","11201","11203","11204","11205",
                 "11206","11207","11208","11209","11210","11211","11212",
                 "11213","11214","11215","11216","11217","11218","11219",
                 "11220","11221","11222","11223","11224","11225","11226",
                 "11228","11229","11230","11231","11232","11233","11234",
                 "11235","11236","11237","11238","11239","11241","11242",
                 "11243","11249","11252","11256","11351","11354","11355",
                 "11356","11357","11358","11359","11360","11361","11362",
                 "11363","11364","11365","11366","11367","11368","11369",
                 "11370","11371","11372","11373","11374","11375","11377",
                 "11378","11379","11385","11411","11412","11413","11414",
                 "11415","11416","11417","11418","11419","11420","11421",
                 "11422","11423","11426","11427","11428","11429","11430",
                 "11432","11433","11434","11435","11436","11691","11692",
                 "11693","11694","11697"]


In [None]:
# this dataset contains USPS zip_code and ZCTA zip code for several states. 
# itindicates whether some a USPS zip code matches with a ZCTA code
# if certain ones doesn't match up, it indicates the equivalence of that


ZiptoZcta_Crosswalk_2021 = pd.read_excel("ZiptoZcta_Crosswalk_2021.xlsx")
ZiptoZcta_Crosswalk_2021["ZIP_CODE"] = ZiptoZcta_Crosswalk_2021["ZIP_CODE"].astype(str)
df['Respondent Address (Zip Code)'] = df['Respondent Address (Zip Code)'].astype(str)

# narrown down the df to only pull out zip codes that matches the nyc zipcode list above
ZiptoZcta_Crosswalk_2021["ZIP_CODE_NYC"] = np.where(ZiptoZcta_Crosswalk_2021["ZIP_CODE"].isin(nyc_zip_codes), "NYC", "Other")

In [None]:
print(ZiptoZcta_Crosswalk_2021["ZIP_CODE_NYC"].value_counts())

ZiptoZcta_Crosswalk_2021_NYC = ZiptoZcta_Crosswalk_2021.loc[ZiptoZcta_Crosswalk_2021["ZIP_CODE_NYC"] == "NYC"]
ZiptoZcta_Crosswalk_2021_NYC

In [None]:
ZiptoZcta_Crosswalk_2021_NYC.to_csv("NYC_Only_ZiptoZcta_Crosswalk_2021.csv")

In [None]:
#create a dic that has the usps zip code on the left and ZCTA code on the right so we can map it 
ZiptoZcta_Crosswalk_2021_NYC_dict = dict(zip(ZiptoZcta_Crosswalk_2021_NYC.ZIP_CODE, ZiptoZcta_Crosswalk_2021_NYC.ZCTA))
ZiptoZcta_Crosswalk_2021_NYC_dict

In [None]:
#using the key above, if the respondent address column matches the key, then the new column will match it with the value
df['Respondent ZCTA'] = df['Respondent Address (Zip Code)'].map(ZiptoZcta_Crosswalk_2021_NYC_dict)

In [None]:
df["Respondent ZCTA"].isnull().value_counts()

In [None]:
df['Respondent ZCTA'] = df["Respondent ZCTA"].astype(str)

print("Respondent ZCTA column contains a total {} unique zip codes. I will use this list to do web scraping\
to get the relevant census files".format(len(df['Respondent ZCTA'].unique())))

Respondent_ZCTA_list = list(set(df['Respondent ZCTA']))
print(Respondent_ZCTA_list)

# Web Scraping

In [None]:
pip install selenium

In [None]:
pip install chromedriver-binary

In [None]:
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup
import chromedriver_binary
#import get to call a get request on the site
from requests import get
import re

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import chromedriver_binary

driver = webdriver.Chrome("/Users/allisongao/Downloads/chromedriver 4")

In [None]:
ZCTA_url =[]

for x in Respondent_ZCTA_list:
    ZCTA_url.append("https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US" + x + "&tid=ACSST5Y2019.S1901&hidePreview=true")
    
    
ZCTA_url

In [None]:
#code to auto click on the download button to get all the census data
import time
# #importing webdriver from selenium
# from selenium import webdriver
 
# # Here Chrome  will be used
# driver = webdriver.Chrome("/Users/allisongao/Downloads/chromedriver 4")
 
# firs_group = ['https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US11229&tid=ACSST5Y2019.S1901&hidePreview=true',
#  'https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US11215&tid=ACSST5Y2019.S1901&hidePreview=true',
#  'https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US11435&tid=ACSST5Y2019.S1901&hidePreview=true',
#  'https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US11208&tid=ACSST5Y2019.S1901&hidePreview=true',
#  'https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US11367&tid=ACSST5Y2019.S1901&hidePreview=true',
#  'https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US11206&tid=ACSST5Y2019.S1901&hidePreview=true',
#  'https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US10458&tid=ACSST5Y2019.S1901&hidePreview=true']
    
# # URL of website

# for url in firs_group:
#     driver.get(url)
    
#     downloadBtn = driver.find_element_by_xpath("//*[text() = 'Download']")
#     downloadBtn.click()
#     time.sleep(5)
#     downloadBtn2 = driver.find_element_by_xpath("//button[@class='aqua-button mt-5']")
#     downloadBtn2.click()
#     time.sleep(2)
#     downloadBtn3=driver.find_element_by_xpath('//*[@id="table-download-now-button"]')
#     downloadBtn3.click()


for x in Respondent_ZCTA_list_39:
    
    link="https://data.census.gov/cedsci/table?q=mean%20income&g=8600000US" + x + "&tid=ACSST5Y2019.S1901&hidePreview=true"
#     link= x
    driver.get(link)
    downloadBtn = driver.find_element_by_xpath("//*[text() = 'Download']")
    downloadBtn.click()
    time.sleep(60)
    downloadBtn2 = driver.find_element_by_xpath("//button[@class='aqua-button mt-5']")
    downloadBtn2.click()
    time.sleep(60)
    downloadBtn3=driver.find_element_by_xpath('//*[@id="table-download-now-button"]')
    downloadBtn3.click()
    time.sleep(60)

# load census data

In [None]:
# load all the census data by zip code and concat it into a single dataframe

zcta_df = []

for x in range(1, 190):
    data = pd.read_csv(str(x) + "ACS.csv")
    
    zcta_df.append(data)
    
#merge them all horizontally 
zcta_df = pd.concat(zcta_df)   


#drop duplicated rows
zcta_df.drop_duplicates(keep='first', inplace=True)

zcta_df = zcta_df.reset_index(drop=True)

#second row is an observation for the entire country, let's drop it
zcta_df.drop(2, inplace=True)

 #change the first row for the header
new_header = zcta_df.iloc[0]
zcta_df = zcta_df[1:]
zcta_df.columns = new_header 

#make sure df is in good shape
zcta_df.head(5)

In [None]:
# need to convert the Geographic Area Name column so that I can use this to merge it to the main df
zcta_df["Geographic Area Name"]= zcta_df["Geographic Area Name"].str.replace('ZCTA5', '')
zcta_df['Geographic Area Name'] = zcta_df['Geographic Area Name'].astype(str)
zcta_df['Geographic Area Name'] = zcta_df['Geographic Area Name'].str.lstrip()

In [None]:
for x in list (zcta_df.columns.tolist()):
    print(x)


In [None]:
# the census data df has a lot of information
# isolate the relevant columns for the final df

columns = ['Geographic Area Name',
           
            "Estimate!!Nonfamily households!!Median income (dollars)",
            "Estimate!!Nonfamily households!!Mean income (dollars)",
            "Estimate!!Nonfamily households!!Total!!Less than $10,000",
            "Estimate!!Nonfamily households!!Total!!$10,000 to $14,999",
            "Estimate!!Nonfamily households!!Total!!$15,000 to $24,999",
            "Estimate!!Nonfamily households!!Total!!$25,000 to $34,999",
            "Estimate!!Nonfamily households!!Total!!$35,000 to $49,999",
            "Estimate!!Nonfamily households!!Total!!$50,000 to $74,999",
           
           
           
           "Estimate!!Households!!Median income (dollars)",
           "Estimate!!Households!!Mean income (dollars)",
           "Estimate!!Households!!Total!!Less than $10,000",
            "Estimate!!Households!!Total!!$10,000 to $14,999",
            "Estimate!!Households!!Total!!$15,000 to $24,999",
            "Estimate!!Households!!Total!!$25,000 to $34,999",
            "Estimate!!Households!!Total!!$35,000 to $49,999",
            "Estimate!!Households!!Total!!$50,000 to $74,999"]
           

In [None]:
zcta_df = zcta_df[columns]
zcta_df.head(8)

In [None]:
zcta_df = zcta_df.rename(columns={"Geographic Area Name": 'Respondent Address (Zip Code)'})

In [None]:
zcta_df.iloc[0,0]

In [None]:
df.iloc[7550,17]

# merge main df with the census df

In [None]:
merged_train_df = pd.merge(df, zcta_df, how="outer", on=["Respondent Address (Zip Code)"])

In [None]:
merged_train_df.tail(1)

In [None]:
merged_train_df.drop(merged_train_df.tail(1).index,inplace=True)

In [None]:
merged_train_df.shape

In [None]:
#moved the target column to the first for easier visual
first_column = merged_train_df.pop('Hearing Result')
merged_train_df.insert(0, 'Hearing Result', first_column)
merged_train_df.head(2)

In [None]:
merged_train_df["Hearing Result"].value_counts(normalize=True)

In [None]:
merged_train_df.info()

In [None]:
columns_fill_na = [
            "Estimate!!Nonfamily households!!Median income (dollars)",
            "Estimate!!Nonfamily households!!Mean income (dollars)",
            "Estimate!!Nonfamily households!!Total!!Less than $10,000",
            "Estimate!!Nonfamily households!!Total!!$10,000 to $14,999",
            "Estimate!!Nonfamily households!!Total!!$15,000 to $24,999",
            "Estimate!!Nonfamily households!!Total!!$25,000 to $34,999",
            "Estimate!!Nonfamily households!!Total!!$35,000 to $49,999",
            "Estimate!!Nonfamily households!!Total!!$50,000 to $74,999",
           "Estimate!!Households!!Median income (dollars)",
           "Estimate!!Households!!Mean income (dollars)",
           "Estimate!!Households!!Total!!Less than $10,000",
            "Estimate!!Households!!Total!!$10,000 to $14,999",
            "Estimate!!Households!!Total!!$15,000 to $24,999",
            "Estimate!!Households!!Total!!$25,000 to $34,999",
            "Estimate!!Households!!Total!!$35,000 to $49,999",
            "Estimate!!Households!!Total!!$50,000 to $74,999"]

In [None]:
def fill_na(column):
    
    for x in column:
        merged_train_df[x].fillna('UNKNOWN', inplace=True)
    
    return merged_train_df.head(2)

In [None]:
fill_na(columns_fill_na)

In [None]:
merged_train_df.info()

In [None]:
merged_train_df["Violation Location (Borough)"].value_counts()

In [None]:
# looked through each of the unknown category and corrected their borough based on the addressed provided 
list_zip_code_change_MA = [5149, 56147,113291,149645,187366, 95107,194000,211902]

for x in list_zip_code_change_MA:
    merged_train_df.at[x, "Violation Location (Borough)"] = "MANHATTAN"
    
    

list_zip_code_change_QNS = [52891, 100964,183982,201329]

for x in list_zip_code_change_QNS:
    merged_train_df.at[x, "Violation Location (Borough)"] = "QUEENS"
    
    
    
list_zip_code_change_SI = [59790, 72781,60756,7278,73248,73613,206307]

for x in list_zip_code_change_SI:
    merged_train_df.at[x, "Violation Location (Borough)"] = "STATEN IS"
    
    
merged_train_df.at[48408, "Violation Location (Zip Code)"] = 10038
merged_train_df.at[210827, "Violation Location (Zip Code)"] = 11559
merged_train_df.at[210873, "Violation Location (Zip Code)"] = 11692
merged_train_df.at[210879, "Violation Location (Zip Code)"] = 11233
merged_train_df.at[210892, "Violation Location (Zip Code)"] = 11692

In [None]:
#check to make sure this column is corrected
merged_train_df["Violation Location (Borough)"].value_counts()

In [None]:
merged_train_df["Violation Location (Zip Code)"].value_counts()

In [None]:
merged_train_df["Respondent Address (Zip Code)"].value_counts()



In [None]:
# pulling out weird respondent address zip code 

117792218 = 11779

117571211 = 11757
117433914 
112212517
117576451
117983702
117274069       
105471054       
7055 =07055 NJ
7102 ==07102 NJ
107012849
8879 == 08879NJ
1008 == unclear delete unknown 
115424211 
104576724
101
7506
117573534       
117033221       
107             
116913065       
100103202
115544540
8610
115201726 
2908
103010468 
112170022
7083
116931854
1226
113733607
7166
7050
7036
116914809
103
7011
7112
7043
117691823       
107103211       
117530754       
115503908
1915
109181420
110035033
8701
115503445
7047
7073 
112041721 
116920311  
116900171     
117794358 
7014
116972206
110
113551701 
112324228
115581926       
7065
115504822
116941710
112
7054 
111
7033 
752
100290289       
112321637       
125949759       
112171006 
112022783 
117061409
100
196
7307
7303
1105
7105
116931498
7631
113
7032            
7726
116915609 
100302472
115012242
112181121
107042262
115532021
116932127
105531607
115504718  
116972203
7304
1120
112181852 
11
117252116
104 
7306
110961359

In [None]:
merged_train_df["Decision Location (Borough)"].value_counts()

In [None]:
# based on the value counts above, we can group this column to make it cleaner


merged_train_df['Decision Location (Borough)'] = merged_train_df['Decision Location (Borough)'].map(
                                     {'UNKNOWN': "UNKNOWN",
                                      'MANHATTAN': 'In Person MANHATTAN',
                                      'BROOKLYN': "In Person BROOKLYN",
                                      'QUEENS': "In Person QUEENS",
                                      'BRONX': "In Person BRONX",
                                      'SAU: MANH': "By Mail MANHATTAN",
                                      'BY PHONE': "BY PHONE",
                                      'STATEN IS': "STATEN IS",
                                     'LONG ISLA': "In Person QUEENS",
                                     'SAU: BX': "By Mail BRONX",
                                     'SAU: BKLN': "By Mail BKLN",
                                     'SAU: LIC': "By Mail QUEENS",
                                     'ONE-CLICK ': "Electronic"
                                     })

In [None]:
merged_train_df["Decision Location (Borough)"].value_counts()

In [None]:
merged_train_df[merged_train_df["Penalty Imposed"] != "UNKNOWN"].mean()

In [None]:
testing

In [None]:
merged_train_df["Paid Amount"].mean()

In [None]:
merged_train_df["Penalty Imposed"].astype(int)
merged_train_df["Paid Amount"].astype(int)

In [None]:
merged_train_df["Penalty Imposed - Paid Amount"] = merged_train_df["Penalty Imposed"] - merged_train_df["Paid Amount"]

In [None]:
Charge #1: Code                                            213243 non-null  object 
Charge #1: Code Section                                    213243 non-null  object 
Charge #1: Code Description                                213243 non-null  object 
Charge #1: Infraction Amount                               213243 non-null  object 
Charge #2: Code                                            213243 non-null  object 
Charge #2: Code Section                                    213243 non-null  object 
Charge #2: Code Description                                213243 non-null  object 
Charge #2: Infraction Amount                               213243 non-null  object 
Charge #3: Code                                            213243 non-null  object 
Charge #3: Code Section                                    213243 non-null  object 
Charge #3: Code Description                                213243 non-null  object 
Charge #3: Infraction Amount 

In [None]:
merged_train_df["Charge #1: Code Description"].value_counts()

### for the respondents, some are individuals and some are commerical entities as indicated on some rows as "LLC." Therefore, need to create a separate column labeling whether the respondent is a person or otherwise

In [None]:
# pd.set_option('display.max_rows', 1000000000)
merged_train_df["Respondent First Name"].value_counts()

In [None]:
merged_train_df.shape

In [None]:
key_words_first_name = ["INC", "CORP", "MANAGEMENT","BUS SERVICE AND TOUR", 
"SCIENCES DIVISION",
"HOUSING DEVELOPMENT"]  

In [None]:
key_words_last_name = ["INC", "CORP", "MANAGEMENT","FIRST HOME PROPERTIES",
"COR",
"3 NYC",
"HPENY HOUSING DEVELOPMENT FUND",
"RT HUDSON ELEMENTARY SCHOOL",
"DEVELOPMENT CO",
"HOLDING CO",
"BANANA KELLY HSG DEVE",
"AQUA PROPERTIES",
"THE BROOKLYN UNION GAS CO",
"VANDERBILT MORTGAGE AND FINANC",
"AMERICAN BROKERS CONDUIT",
"CMI BUSINESS FURNITURE",
"FRIENDS LAND DEVELOP",
"HARBOR VIEW PROP LTD",
"INGERSOLL TENANT ASSOC",
"THE BROOKLYN UNION GAS COMPANY ",
"PLAZA CONSTRUCTION",
"AUTO AUCTION"
"FIRST HOME PROP",
"1046 WASHINGTON AVE HDFC",
"DIEGO BEEKMAN MUTUAL HOUSING A",
"REV MANAGEMENT",
"LANDSLIDE PROPERTIES",
"NEIGHBORHOOD RESTORE HOUSING D",
"HTB ENTERPRISES LTD",
"ALLIANCE OF INDIVIDUA",
"WJR PROPERTIES INC",
"WJR PROPERTIES INC",
"KEYSPAN ENERGY DELIVERY NYC",
"RLTY",
"FIRST UNITED MORTGAGE BANKING",
"ASSET PLUSS MANAGEMENT SERVICE",
"KEYSPAN ENERGY DELIVERY N Y C",
"WELLS FARGO HOME MORT",
"ALLIANCE OF INDIVIDUAL",
"NEIGHBORHOOD RESTORE HDFC",
"WILMINGTON SAVINGS FUND SOCIET",
"YOUNG ISRAEL OF AVENUE K",
"FREMONT INVESTMENT LOAN",
"BELL ATLANTIC",
"EM ESS PETROLEUM CORP",
"PI CONSTRUCTION SERVICE INC",
"US BANK NATIONAL ASSOCIATION",
"CONKLIN MGMT CO",
"CON EDISON",
"CONSOLIDATED EDISON",
"EMPIRE CITY SUBWAY",
"DEUTSCHE BANK NATIONAL TRUST C",
"NATIONAL GRID",
"CONTACT HOLDINGS CORP",
"U S BANK NATIONAL ASSOCIATION",
"G G ASSOCIATES",
"WELLS FARGO BANK",
"LUCKY SEAFOOD",
"AGENT OWNER",
"FEDERAL NATIONAL MORTGAGE ASSO",
"AMENCAN HOME MORTGAGE",
"HOMESIDE LENDING",
"HSBC BANK USA",
"HSBC BANK USA NA",
"HIGH STATE RLTY CORP",
"NYC HOUSING AUTHORITY",
"PLAZA CONSTRUCTION CORP",
"EASY STREET PLUMBING INC",
"1249 WEBSTER AVE RLTY",
"DEVELOP", "BANK", "RESOURCES", "SERVICES", "LLC", "SCHOOL", "HOME","NATIONAL GRID","SAM CONEY ISLAND LLC"
                    "ALL PHASE PLUMBING CORP","ERCAT REALTY CORP"]

In [None]:
merged_train_df['Respondent Last Name'] = merged_train_df['Respondent Last Name'].astype(str)

In [None]:
# def word_checker(sentence):
#     if any(word in key_words_last_name for word in sentence.lower().split()):
#         return 'Not Person'
#     else:
#         return 'Person'

In [None]:
# merged_df['Respondent Status'] = merged_df['Respondent Last Name'].apply(word_checker)  

In [None]:
def get_word(my_string):
    for word in key_words_last_name:
        for x in merged_df["Respondent Last Name"]:
            if word.lower() in my_string.lower():
                return "Not Person"
            else:
                return "Person"

In [None]:
merged_train_df["Respondent Status"]= merged_train_df["Respondent Last Name"].apply(get_word)

In [None]:
merged_train_df.sample(2)

In [None]:
merged_df['Respondent Status'].value_counts()

In [None]:
merged_df.loc[merged_df['Respondent Last Name'] == "NATIONAL GRID"]

In [None]:
merged_df.drop("JURISDICTION NAME", axis=1, inplace=True)

In [None]:
merged_df.head(1)

In [None]:
# pd.options.display.max_colwidth = 1000000
# pd.set_option('display.max_columns', 2000000000)
# pd.set_option('display.max_rows', 1000000000)
# pd.set_option('display.expand_frame_repr', True)

# FSM

just pulled in one feature b/c i need to do more work on the columns. 

In [None]:
dummy_model = DummyClassifier(strategy="most_frequent")
dummy_model.fit(df["Violation Location (City)"], df["Hearing Result"])
y_hat = dummy_model.predict(df)

In [None]:
acc = accuracy_score(df["Hearing Result"],y_hat)
macro_precision_score=precision_score(df["Hearing Result"], y_hat, average='macro')
micro_precision_score=precision_score(df["Hearing Result"] , y_hat, average='micro')
macro_recall_score=recall_score(df["Hearing Result"], y_hat, average='macro')
micro_recall_score=recall_score(df["Hearing Result"], y_hat, average='micro')

print('Accuracy Score: {}'.format(acc))
print('Macro Precision Score: {}'.format(macro_precision_score))
print('Micro Precision Score: {}'.format(micro_precision_score))
print('Macro Recall Score: {}'.format(macro_recall_score))
print('Micro Recall Score: {}'.format(micro_recall_score))