In [7]:
import numpy as np
import pandas as pd

In [13]:
def replace_missing_values(df):
    
    '''
    This function replaces missing values in dates, additional information and source
    
    Input: dataframe
    Output: dataframe
    
    References:
    # https://www.geeksforgeeks.org/python-pandas-dataframe-fillna-to-replace-null-values-in-dataframe/
    # https://stackoverflow.com/questions/38607381/python-pandas-if-the-data-is-nan-then-change-to-be-0-else-change-to-be-1-in-d
    '''
    
    df["date_confirmation"].fillna("19.02.2020", inplace = True) 
    df[["additional_information", "source"]] = df[["additional_information", "source"]].notnull().astype(int)

    return df

<br>

## Training Set

In [15]:
train = pd.read_csv("cases_train.txt")
train.head(10)

Unnamed: 0,age,sex,province,country,latitude,longitude,date_confirmation,additional_information,source,outcome
0,,,Delhi,India,28.61474,77.2091,26.05.2020,,https://twitter.com/CMODelhi/status/1265203060...,recovered
1,,,Uttar Pradesh,India,25.43609,81.84718,20.05.2020,,https://t.me/indiacovid/5222,hospitalized
2,,,Maharashtra,India,18.50422,73.85302,26.05.2020,,https://t.me/indiacovid/5601,hospitalized
3,15-34,female,Baden-Wurttemberg,Germany,48.12086,7.8495,15.03.2020,,,nonhospitalized
4,,,Gujarat,India,23.188409,73.647122,20.05.2020,,https://twitter.com/PIBAhmedabad/status/126311...,hospitalized
5,,,Maharashtra,India,19.2,72.96667,28.05.2020,,https://phdmah.maps.arcgis.com/apps/opsdashboa...,recovered
6,,,Madhya Pradesh,India,22.71622,75.86512,18.05.2020,,https://twitter.com/healthminmp/status/1262379...,hospitalized
7,35-59,male,Bayern,Germany,48.40077,11.74376,27.03.2020,,,nonhospitalized
8,,,Gujarat,India,23.02776,72.60027,24.05.2020,,https://www.deshgujarat.com/2020/05/24/gujarat...,hospitalized
9,,,Ticino,Switzerland,46.298856,8.808264,10.03.2020 - 12.03.2020,,https://www4.ti.ch/area-media/comunicati/detta...,nonhospitalized


In [16]:
train = replace_missing_values(train)
train.head(10)

Unnamed: 0,age,sex,province,country,latitude,longitude,date_confirmation,additional_information,source,outcome
0,,,Delhi,India,28.61474,77.2091,26.05.2020,0,1,recovered
1,,,Uttar Pradesh,India,25.43609,81.84718,20.05.2020,0,1,hospitalized
2,,,Maharashtra,India,18.50422,73.85302,26.05.2020,0,1,hospitalized
3,15-34,female,Baden-Wurttemberg,Germany,48.12086,7.8495,15.03.2020,0,0,nonhospitalized
4,,,Gujarat,India,23.188409,73.647122,20.05.2020,0,1,hospitalized
5,,,Maharashtra,India,19.2,72.96667,28.05.2020,0,1,recovered
6,,,Madhya Pradesh,India,22.71622,75.86512,18.05.2020,0,1,hospitalized
7,35-59,male,Bayern,Germany,48.40077,11.74376,27.03.2020,0,0,nonhospitalized
8,,,Gujarat,India,23.02776,72.60027,24.05.2020,0,1,hospitalized
9,,,Ticino,Switzerland,46.298856,8.808264,10.03.2020 - 12.03.2020,0,1,nonhospitalized


In [17]:
train[["date_confirmation", "additional_information", "source", "outcome"]].isnull().values.any()

False

In [18]:
df1 = train[["province", "country", "date_confirmation"]]
# https://stackoverflow.com/questions/44548721/remove-row-with-null-value-from-pandas-data-frame/44548976
df2 = df1.dropna(how = 'any', axis = 0)
df2.isnull().values.any()
#type(df2.iloc[367632,0])
# df = train[train['date_confirmation'].notna()]
#train["province"].drop_duplicates()

False

In [19]:
#location['Province_State'].fillna('', inplace = True)

def key_generation(df, province, country):
    
    '''
    This function combines province and country names to make a compound key
    
    Input: dataframe, "province name", "country name"
    Output: original dataframe & a new key column
    
    Reference:
    https://www.geeksforgeeks.org/adding-new-column-to-existing-dataframe-in-pandas/
    '''
    
    return df.assign(key = df[province].str.cat(df[country], sep = ', '))

In [20]:
def convert_time(df, time_column):
    
    '''
    This function extracts the necessary date components and converts them to date
    
    Input: dataframe, column
    Output: date column
    
    References:
    # https://stackoverflow.com/questions/13682044/remove-unwanted-parts-from-strings-in-a-column
    # https://stackoverflow.com/questions/40841867/how-to-convert-dd-mm-yyyy-into-yyyy-mm-dd-with-pandas-in-python
    '''
    
    time_extract = df[time_column].map(lambda x: str(x)[:10])
    #train[time_column] = pd.to_datetime(train[time_column].map(lambda x: str(x)[:10]))
    
    df[time_column] = pd.to_datetime(time_extract)
    
    return df

In [21]:
df3 = df2.replace({'country':{"United States":"US"}})

In [22]:
df4 = key_generation(df3, "province", "country")

In [23]:
df5 = convert_time(df4, "date_confirmation")

In [24]:
df5[df5["country"] == "US"]

Unnamed: 0,province,country,date_confirmation,key
155,New Mexico,US,2020-03-21,"New Mexico, US"
300,Florida,US,2020-03-27,"Florida, US"
894,Florida,US,2020-03-27,"Florida, US"
895,Florida,US,2020-03-21,"Florida, US"
968,Nevada,US,2020-03-13,"Nevada, US"
...,...,...,...,...
367028,Maine,US,2020-03-19,"Maine, US"
367037,Florida,US,2020-03-25,"Florida, US"
367271,North Dakota,US,2020-03-19,"North Dakota, US"
367403,Florida,US,2020-03-23,"Florida, US"


<br>

## Test Set

In [25]:
test = pd.read_csv("cases_test.txt")
test.head(5)

Unnamed: 0,age,sex,province,country,latitude,longitude,date_confirmation,additional_information,source,outcome
0,,,Rajasthan,India,24.587,73.69848,11.05.2020,,https://t.me/indiacovid/4565,
1,15-34,male,Bayern,Germany,49.993767,9.66288,09.04.2020,,,
2,,,Maharashtra,India,19.2,72.96667,23.05.2020,,https://phdmah.maps.arcgis.com/apps/opsdashboa...,
3,,,Madhya Pradesh,India,23.18087,75.78816,22.05.2020,,https://twitter.com/JansamparkMP/status/126384...,
4,56,female,Lima,Peru,-12.00395,-77.00695,19.04.2020,,,


In [26]:
test = replace_missing_values(test)
test.head(10)

Unnamed: 0,age,sex,province,country,latitude,longitude,date_confirmation,additional_information,source,outcome
0,,,Rajasthan,India,24.587,73.69848,11.05.2020,0,1,
1,15-34,male,Bayern,Germany,49.993767,9.66288,09.04.2020,0,0,
2,,,Maharashtra,India,19.2,72.96667,23.05.2020,0,1,
3,,,Madhya Pradesh,India,23.18087,75.78816,22.05.2020,0,1,
4,56,female,Lima,Peru,-12.00395,-77.00695,19.04.2020,0,0,
5,,,Andhra Pradesh,India,16.3,80.45,28.05.2020,0,1,
6,49,male,Lima,Peru,-12.078,-77.09587,30.05.2020,0,0,
7,,,Maharashtra,India,18.94017,72.83483,28.05.2020,0,1,
8,72,male,Cebu Province,Philippines,10.33333,123.75,03.06.2020,1,1,
9,,,Uttar Pradesh,India,25.74508,82.68237,25.05.2020,0,1,


In [27]:
d1 = test[["province", "country", "date_confirmation"]]
d2 = df1.dropna(how = 'any', axis = 0)
d2.isnull().values.any() # also can be done for a specific column e.g., d2["province"].isnull().values.any()

False

In [28]:
d3 = d2.replace({'country':{"United States":"US"}})
d4 = key_generation(d3, "province", "country")
d5 = convert_time(d4, "date_confirmation")
d5[d5["country"] == "US"]

Unnamed: 0,province,country,date_confirmation,key
155,New Mexico,US,2020-03-21,"New Mexico, US"
300,Florida,US,2020-03-27,"Florida, US"
894,Florida,US,2020-03-27,"Florida, US"
895,Florida,US,2020-03-21,"Florida, US"
968,Nevada,US,2020-03-13,"Nevada, US"
...,...,...,...,...
367028,Maine,US,2020-03-19,"Maine, US"
367037,Florida,US,2020-03-25,"Florida, US"
367271,North Dakota,US,2020-03-19,"North Dakota, US"
367403,Florida,US,2020-03-23,"Florida, US"


<br>

## Location

In [31]:
def missing_value_by_mean(df, column, key = "key"):
    
    '''
    This function fills in the missing values by group mean using key as the group
    
    Input: dataframe, column to be filled, groupby column name
    Output: column to be filled
    
    Reference:
    # https://stackoverflow.com/questions/19966018/pandas-filling-missing-values-by-mean-in-each-group

    '''
    
    df[column] = df[column].fillna(df.groupby(key)[column].transform("mean"))
    return df[column]

In [32]:
def missing_value_by_country_mean(df, column, key = "Country_Region"):
    
    '''
    This function fills in the missing values by group mean using country as the group
    
    Input: dataframe, column to be filled, groupby column name
    Output: column to be filled
    
    Reference:
    # https://stackoverflow.com/questions/19966018/pandas-filling-missing-values-by-mean-in-each-group

    '''
    
    df[column] = df[column].fillna(df.groupby(key)[column].transform("mean"))
    return df[column]

In [41]:
location = pd.read_csv("location.txt")
del location["Case-Fatality_Ratio"]
del location["Active"]
location.head(3)

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate
0,,Afghanistan,2020-09-20 04:22:56,33.93911,67.709953,38919,1437,32576,Afghanistan,99.976005
1,,Albania,2020-09-20 04:22:56,41.1533,20.1683,12226,358,6888,Albania,424.838418
2,,Algeria,2020-09-20 04:22:56,28.0339,1.6596,49623,1665,34923,Algeria,113.162645


In [42]:
location[["Country_Region","Last_Update", "Confirmed", "Deaths", "Recovered"]].isnull().values.any()

False

In [43]:
location[location.isnull().any(axis=1)].shape

(246, 10)

In [44]:
#https://www.w3resource.com/pandas/dataframe/dataframe-dropna.php
loc1 = location.dropna(subset = ["Province_State"])
loc1.head(3)

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate
8,Australian Capital Territory,Australia,2020-09-20 04:22:56,-35.4735,149.0124,113,3,110,"Australian Capital Territory, Australia",26.395702
9,New South Wales,Australia,2020-09-20 04:22:56,-33.8688,151.2093,4200,53,2994,"New South Wales, Australia",51.736881
10,Northern Territory,Australia,2020-09-20 04:22:56,-12.4634,130.8456,33,0,31,"Northern Territory, Australia",13.436482


In [45]:
#https://stackoverflow.com/questions/14247586/how-to-select-rows-with-one-or-more-nulls-from-a-pandas-dataframe-without-listin
loc1[loc1.isnull().any(axis=1)]

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate
67,Diamond Princess,Canada,2020-09-20 04:22:56,,,0,1,0,"Diamond Princess, Canada",
68,Grand Princess,Canada,2020-09-20 04:22:56,,,13,0,13,"Grand Princess, Canada",
96,Unknown,Chile,2020-09-20 04:22:56,,,54,1,54,"Unknown, Chile",
220,Unknown,Germany,2020-09-20 04:22:56,,,2344,0,0,"Unknown, Germany",
266,Unknown,India,2020-09-20 04:22:56,,,0,0,0,"Unknown, India",
...,...,...,...,...,...,...,...,...,...,...
3742,Washington,US,2020-09-20 04:22:56,,,292,5,0,"Unassigned, Washington, US",
3796,West Virginia,US,2020-09-20 04:22:56,,,0,0,0,"Unassigned, West Virginia, US",
3866,Wisconsin,US,2020-09-20 04:22:56,,,0,0,0,"Unassigned, Wisconsin, US",
3898,Wyoming,US,2020-09-20 04:22:56,,,1,0,0,"Unassigned, Wyoming, US",


In [46]:
loc2 = key_generation(loc1, "Province_State", "Country_Region")
loc2.head(2)

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate,key
8,Australian Capital Territory,Australia,2020-09-20 04:22:56,-35.4735,149.0124,113,3,110,"Australian Capital Territory, Australia",26.395702,"Australian Capital Territory, Australia"
9,New South Wales,Australia,2020-09-20 04:22:56,-33.8688,151.2093,4200,53,2994,"New South Wales, Australia",51.736881,"New South Wales, Australia"


In [47]:
loc2[["Incidence_Rate"]].isnull().values.any()

True

In [48]:
loc2["Incidence_Rate"] = missing_value_by_mean(loc2, "Incidence_Rate")

In [49]:
loc2[loc2.isnull().any(axis=1)]

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate,key
67,Diamond Princess,Canada,2020-09-20 04:22:56,,,0,1,0,"Diamond Princess, Canada",,"Diamond Princess, Canada"
68,Grand Princess,Canada,2020-09-20 04:22:56,,,13,0,13,"Grand Princess, Canada",,"Grand Princess, Canada"
96,Unknown,Chile,2020-09-20 04:22:56,,,54,1,54,"Unknown, Chile",,"Unknown, Chile"
220,Unknown,Germany,2020-09-20 04:22:56,,,2344,0,0,"Unknown, Germany",,"Unknown, Germany"
266,Unknown,India,2020-09-20 04:22:56,,,0,0,0,"Unknown, India",,"Unknown, India"
...,...,...,...,...,...,...,...,...,...,...,...
3742,Washington,US,2020-09-20 04:22:56,,,292,5,0,"Unassigned, Washington, US",1135.256154,"Washington, US"
3796,West Virginia,US,2020-09-20 04:22:56,,,0,0,0,"Unassigned, West Virginia, US",599.240468,"West Virginia, US"
3866,Wisconsin,US,2020-09-20 04:22:56,,,0,0,0,"Unassigned, Wisconsin, US",1172.414045,"Wisconsin, US"
3898,Wyoming,US,2020-09-20 04:22:56,,,1,0,0,"Unassigned, Wyoming, US",801.660985,"Wyoming, US"


In [50]:
loc2["Incidence_Rate"] = missing_value_by_country_mean(loc2, "Incidence_Rate")

In [51]:
loc2[loc2.isnull().any(axis=1)]

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate,key
67,Diamond Princess,Canada,2020-09-20 04:22:56,,,0,1,0,"Diamond Princess, Canada",181.242244,"Diamond Princess, Canada"
68,Grand Princess,Canada,2020-09-20 04:22:56,,,13,0,13,"Grand Princess, Canada",181.242244,"Grand Princess, Canada"
96,Unknown,Chile,2020-09-20 04:22:56,,,54,1,54,"Unknown, Chile",1928.356191,"Unknown, Chile"
220,Unknown,Germany,2020-09-20 04:22:56,,,2344,0,0,"Unknown, Germany",267.234195,"Unknown, Germany"
266,Unknown,India,2020-09-20 04:22:56,,,0,0,0,"Unknown, India",505.877622,"Unknown, India"
...,...,...,...,...,...,...,...,...,...,...,...
3742,Washington,US,2020-09-20 04:22:56,,,292,5,0,"Unassigned, Washington, US",1135.256154,"Washington, US"
3796,West Virginia,US,2020-09-20 04:22:56,,,0,0,0,"Unassigned, West Virginia, US",599.240468,"West Virginia, US"
3866,Wisconsin,US,2020-09-20 04:22:56,,,0,0,0,"Unassigned, Wisconsin, US",1172.414045,"Wisconsin, US"
3898,Wyoming,US,2020-09-20 04:22:56,,,1,0,0,"Unassigned, Wyoming, US",801.660985,"Wyoming, US"


In [53]:
loc2[["Incidence_Rate"]].isnull().values.any()

False

In [54]:
loc3 = loc2.dropna(subset = ["Lat", "Long_"])

In [55]:
loc3.head(5)

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Combined_Key,Incidence_Rate,key
8,Australian Capital Territory,Australia,2020-09-20 04:22:56,-35.4735,149.0124,113,3,110,"Australian Capital Territory, Australia",26.395702,"Australian Capital Territory, Australia"
9,New South Wales,Australia,2020-09-20 04:22:56,-33.8688,151.2093,4200,53,2994,"New South Wales, Australia",51.736881,"New South Wales, Australia"
10,Northern Territory,Australia,2020-09-20 04:22:56,-12.4634,130.8456,33,0,31,"Northern Territory, Australia",13.436482,"Northern Territory, Australia"
11,Queensland,Australia,2020-09-20 04:22:56,-27.4698,153.0251,1152,6,1123,"Queensland, Australia",22.519793,"Queensland, Australia"
12,South Australia,Australia,2020-09-20 04:22:56,-34.9285,138.6007,466,4,462,"South Australia, Australia",26.530031,"South Australia, Australia"


In [56]:
def aggregate(df):
    
    '''
    This function aggregates groups by sums and means then perform column operations
    
    Input: dataframe
    Output: dataframe
    
    References:
    https://stackoverflow.com/questions/48909110/python-pandas-mean-and-sum-groupby-on-different-columns-at-the-same-time
    # https://stackoverflow.com/questions/20461165/how-to-convert-index-of-a-pandas-dataframe-into-a-column
    '''
    
    # aggregation of sums and means
    col_names = {'Lat':'latitude', 'Long_':'longitude','Confirmed':'confirmed_sum', 'Deaths':'death_sum', 'Recovered':'recovered_sum', 'Incidence_Rate':'incidence_rate_avg'}
    df = df.groupby('key').agg({'Lat':'mean', 'Long_':'mean', 'Confirmed':'sum', 'Deaths':'sum', 'Recovered':'sum', 'Incidence_Rate':'mean'}).rename(columns = col_names)
    
    # reset key index
    df.reset_index(level = 0, inplace = True)
    
    # column operations
    df["active_sum"] = df["confirmed_sum"] - df["death_sum"] - df["recovered_sum"]
    df["Case-Fatality_Ratio"] = df["death_sum"] / df["confirmed_sum"] * 100
    
    return df

In [57]:
loc4 = aggregate(loc3)
loc4

Unnamed: 0,key,latitude,longitude,confirmed_sum,death_sum,recovered_sum,incidence_rate_avg,active_sum,Case-Fatality_Ratio
0,"Abruzzo, Italy",42.351222,13.398438,4137,474,2984,315.421095,679,11.457578
1,"Acre, Brazil",-9.023800,-70.812000,27061,648,24298,3068.366716,2115,2.394590
2,"Adygea Republic, Russia",44.693901,40.152042,3775,31,3253,832.642222,491,0.821192
3,"Aguascalientes, Mexico",21.885300,-102.291600,6581,555,5783,458.722950,243,8.433369
4,"Aichi, Japan",35.035551,137.211621,5077,79,4554,67.225097,444,1.556037
...,...,...,...,...,...,...,...,...,...
555,"Zaporizhia Oblast, Ukraine",47.838800,35.139600,2829,42,1075,165.842437,1712,1.484624
556,"Zeeland, Netherlands",51.479360,3.861559,1210,72,0,315.524866,1138,5.950413
557,"Zhejiang, China",29.183200,120.093400,1282,1,1272,2.234617,9,0.078003
558,"Zhytomyr Oblast, Ukraine",50.254700,28.658700,4931,90,2379,404.116398,2462,1.825188


<br>

## miscellaneous

In [144]:
#location[location['Province_State'] == ' '].index
# https://stackoverflow.com/questions/27159189/find-empty-or-nan-entry-in-pandas-dataframe
t = pd.isna(location['Province_State'])

In [164]:
#location[['Province_State', 'Country_Region']].agg(','.join, axis=1)
#d = location.Country_RegionProvince_State.str.cat(location.Country_Region, sep=', ')
#d.head(10)

location['Province_State'].fillna('', inplace = True)
d = location.Province_State.str.cat(location.Country_Region, sep=', ')
d.head(10)

0                              , Afghanistan
1                                  , Albania
2                                  , Algeria
3                                  , Andorra
4                                   , Angola
5                      , Antigua and Barbuda
6                                , Argentina
7                                  , Armenia
8    Australian Capital Territory, Australia
9                 New South Wales, Australia
Name: Province_State, dtype: object

In [119]:
z = w["Combined_Key"].str.split(',', expand=True)

In [26]:
#train["province"].drop_duplicates()
location["Country_Region"]

0              Afghanistan
1                  Albania
2                  Algeria
3                  Andorra
4                   Angola
               ...        
3949    West Bank and Gaza
3950        Western Sahara
3951                 Yemen
3952                Zambia
3953              Zimbabwe
Name: Country_Region, Length: 3954, dtype: object

In [32]:
location["Country_Region"].value_counts()

US              3270
Russia            83
Japan             49
India             37
Colombia          33
                ... 
Brunei             1
Guinea             1
Saudi Arabia       1
Montenegro         1
Sri Lanka          1
Name: Country_Region, Length: 188, dtype: int64

In [68]:
x = location[location["Country_Region"] == "US"]

In [89]:
#x.head(50).count

In [69]:
x.to_csv('file.csv', index=False)

In [82]:
# https://queirozf.com/entries/pandas-dataframe-replace-examples
y = train[train["country"] == "United States"]
#y["country"]= y["country"].str.replace("United States", "US") 
#y["country"]= y["country"].replace("United States", "US",inplace=False)
#y
#y["country"] = y["country"].replace("United States", "US", inplace=True)

y.replace({'country':{"United States":"US"}})





#y["country"]= y["country"].str.replace("United States", "US", case = False)
#y
#y.loc[y.country == 'United States', 'bidderrate'] = 100

Unnamed: 0,age,sex,province,country,latitude,longitude,date_confirmation,additional_information,source,outcome
155,70-79,female,New Mexico,US,35.052112,-106.665680,21.03.2020,,https://cv.nmhealth.org/,nonhospitalized
300,56,male,Florida,US,26.152188,-80.487715,27.03.2020,,https://floridahealthcovid19.gov/,nonhospitalized
894,48,female,Florida,US,30.332953,-81.670940,27.03.2020,,https://floridahealthcovid19.gov/,nonhospitalized
895,56,female,Florida,US,27.471608,-82.300220,21.03.2020,,https://floridahealthcovid19.gov/,nonhospitalized
968,30-39,female,Nevada,US,36.219214,-115.018570,13.03.2020,,https://news3lv.com/news/local/9-new-coronavir...,nonhospitalized
...,...,...,...,...,...,...,...,...,...,...
367028,,,Maine,US,44.673122,-68.356636,19.03.2020,,https://www.maine.gov/dhhs/mecdc/infectious-di...,nonhospitalized
367037,43,male,Florida,US,28.717264,-81.236382,25.03.2020,,https://floridahealthcovid19.gov/,nonhospitalized
367271,,,North Dakota,US,46.977952,-100.468970,19.03.2020,,https://www.health.nd.gov/diseases-conditions/...,nonhospitalized
367403,68,male,Florida,US,28.769320,-81.709346,23.03.2020,,https://floridahealthcovid19.gov/,nonhospitalized


In [81]:
train['country'].to_csv('file.csv', index=False)

In [200]:
pd.get_dummies(train['sex'])

Unnamed: 0,female,male
0,0,0
1,0,0
2,0,0
3,1,0
4,0,0
...,...,...
367631,0,1
367632,0,0
367633,0,0
367634,1,0


In [201]:
pd.get_dummies(train[['province', 'country']])

Unnamed: 0,province_ABANCAY,province_ACOMAYO,province_AIJA,province_ANGARAES,province_ANTA,province_ANTABAMBA,province_ANTONIO RAIMONDI,province_ATALAYA,province_AYABACA,province_AYMARAES,...,country_Uganda,country_Ukraine,country_United Arab Emirates,country_United Kingdom,country_United States,country_Uruguay,country_Venezuela,country_Vietnam,country_Zambia,country_Zimbabwe
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
367631,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
367632,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
367633,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
367634,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
