In [2]:
# Load libraries
import pandas as pd
import sklearn_pandas
import numpy as np
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
import os # used to create necessary folders
import json
import datetime as dt
import logging
import time
import getpass
import requests
from requests.exceptions import HTTPError
import glob
import sys
from swat import *
import pickle
import zipfile
from math import sqrt
from scipy.stats import kendalltau
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import confusion_matrix                    
from sklearn.metrics import fbeta_score
from scipy.stats import ks_2samp

cas_host = 'sasserver.demo.sas.com'
cas_port= 5570
pd.set_option('display.max_colwidth', -1)
s = CAS(cas_host, cas_port, 'sasdemo', 'Orion123')
s.sessionprop.setsessopt(caslib='Public')


baseurl = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'


NOTE: 'Public' is now the active caslib.


In [3]:
path = r'/home/sasdemo/COVID_PROJECT/COVID_DATA/' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

    
frame = pd.concat(li, axis=0, ignore_index=True,sort=False)


In [4]:
table_name='COVID_FILES'
LIB_OUT='PUBLIC'
s.upload_frame(frame, importoptions=None, casout={'name':table_name,'caslib':LIB_OUT,'replace':'yes'})
s.table.save(table=table_name, name=table_name+'.sashdat', replace=True)
s.table.loadTable(path=table_name+'.sashdat',casout={"name":table_name,"caslib":LIB_OUT,'replace':True})


NOTE: Cloud Analytic Services made the uploaded file available as table COVID_FILES in caslib PUBLIC.
NOTE: The table COVID_FILES has been created in caslib PUBLIC from binary data uploaded to Cloud Analytic Services.
NOTE: Cloud Analytic Services saved the file COVID_FILES.sashdat in caslib Public.
NOTE: Cloud Analytic Services made the file COVID_FILES.sashdat available as table COVID_FILES in caslib PUBLIC.


In [29]:
s.datastep.runcode('data PUBLIC.COVID_FILES;set PUBLIC.COVID_FILES;DATE_UPDATE=input("Last Update"n,ANYDTDTM19.);Date=datepart(DATE_UPDATE);format DATE_UPDATE datetime. Date Date9.;if "Province/State"n="" then "Province/State"n="Country/Region"n; rename "Province/State"n=Province "Country/Region"n=Country;run;')

Unnamed: 0,casLib,Name,Rows,Columns,casTable
0,Public,COVID_FILES,5890,10,"CASTable('COVID_FILES', caslib='Public')"

Unnamed: 0,casLib,Name,Rows,Columns,Append,Promoted,casTable
0,Public,COVID_FILES,5890,10,,N,"CASTable('COVID_FILES', caslib='Public')"


In [38]:
s.builtins.loadActionSet("fedSql")    
s.fedSql.execDirect(                                                              #4
    query='''create table COVID_ALL{options replace=true} as select distinct Province,Country,Date,max(deaths) as Deaths,max(Recovered) as Recovered, max(Confirmed) as Confirmed from COVID_FILES group by Country,Province,Date '''
 )

s.fedSql.execDirect(                                                              #4
    query='''create table COVID_GEO{options replace=true} as select distinct Province,Country,avg(Latitude) as lat,avg(Longitude) as long from COVID_FILES  where longitude is not null group by Country,Province'''
 )


s.fedSql.execDirect(                                                              #4
    query='''create table COVID_FINAL{options replace=true} as 
    select distinct t1.Province,t1.Country,t1.Date,t2.lat,t2.long,t1.Deaths,t1.Recovered,t1.Confirmed 
    from COVID_ALL t1 left join COVID_GEO t2 on t1.Country=t2.Country and t1.Province=t2.Province'''
 )


NOTE: Added action set 'fedSql'.
NOTE: CASDAL driver. Creation of a DATE column has been requested, but is not supported by the CASDAL driver. A DOUBLE PRECISION column will be created instead. A DATE format will be associated with the column.
NOTE: CASDAL driver. Creation of a DATE column has been requested, but is not supported by the CASDAL driver. A DOUBLE PRECISION column will be created instead. A DATE format will be associated with the column.
NOTE: Table COVID_ALL was created in caslib Public with 3556 rows returned.
NOTE: Table COVID_GEO was created in caslib Public with 454 rows returned.
NOTE: CASDAL driver. Creation of a DATE column has been requested, but is not supported by the CASDAL driver. A DOUBLE PRECISION column will be created instead. A DATE format will be associated with the column.
NOTE: CASDAL driver. Creation of a DATE column has been requested, but is not supported by the CASDAL driver. A DOUBLE PRECISION column will be created instead. A DATE format will be 

In [40]:
table_name='COVID_FINAL'
LIB_OUT='PUBLIC'
s.table.save(table=table_name, name=table_name+'.sashdat', replace=True)
s.table.loadTable(path=table_name+'.sashdat',casout={"name":table_name,"caslib":LIB_OUT,'replace':True})

NOTE: Cloud Analytic Services saved the file COVID_FINAL.sashdat in caslib Public.
NOTE: Cloud Analytic Services made the file COVID_FINAL.sashdat available as table COVID_FINAL in caslib PUBLIC.


In [46]:

s.fedSql.execDirect(                                                              #4
    query='''
    select distinct t1.Province,t1.DATE_UPDATE,t1.Country,t1.Date,t1.Deaths,t1.Recovered,t1.Confirmed 
    from COVID_FILES t1 where t1.Country='Italy' order by date'''
 )


Unnamed: 0,Province,DATE_UPDATE,Country,Date,Deaths,Recovered,Confirmed
0,Italy,2020-01-31 08:15:00,Italy,2020-01-31,0.0,0.0,2.0
1,Italy,2020-01-31 08:15:53,Italy,2020-01-31,0.0,0.0,2.0
2,Italy,2020-01-31 23:59:00,Italy,2020-01-31,,,2.0
3,Italy,2020-02-07 17:53:02,Italy,2020-02-07,0.0,0.0,3.0
4,Italy,2020-02-21 23:33:06,Italy,2020-02-21,1.0,0.0,20.0
5,Italy,2020-02-22 23:43:02,Italy,2020-02-22,2.0,1.0,62.0
6,Italy,2020-02-23 23:43:02,Italy,2020-02-23,3.0,2.0,155.0
7,Italy,2020-02-24 23:43:01,Italy,2020-02-24,7.0,1.0,229.0
8,Italy,2020-02-25 18:55:32,Italy,2020-02-25,10.0,1.0,322.0
9,Italy,2020-02-26 23:43:03,Italy,2020-02-26,12.0,3.0,453.0


In [5]:
csv_name='03-12-2020'
filename='/home/sasdemo/COVID_PROJECT/COVID_DATA/'+csv_name+'.csv'
table=pd.read_csv(filename)
table

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
0,Hubei,China,2020-03-12T09:53:06,67781,3056,50318,30.9756,112.2707
1,,Italy,2020-03-11T21:33:02,12462,827,1045,43.0000,12.0000
2,,Iran,2020-03-12T11:13:27,10075,429,2959,32.0000,53.0000
3,,"Korea, South",2020-03-12T05:13:02,7869,66,333,36.0000,128.0000
4,France,France,2020-03-11T22:53:03,2281,48,12,46.2276,2.2137
5,,Spain,2020-03-12T01:17:58,2277,55,183,40.0000,-4.0000
6,,Germany,2020-03-12T09:53:06,2078,3,25,51.0000,9.0000
7,Guangdong,China,2020-03-12T03:53:02,1356,8,1289,23.3417,113.4244
8,Henan,China,2020-03-11T08:13:09,1273,22,1249,33.8820,113.6140
9,Zhejiang,China,2020-03-12T01:33:02,1215,1,1197,29.1832,120.0934


In [30]:
csv_name='03-12-2020'
filename='/home/sasdemo/COVID_PROJECT/COVID_DATA/'+csv_name+'.csv'
table=pd.read_csv(filename)
table[table['Country/Region']=='France']

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
4,France,France,2020-03-11T22:53:03,2281,48,12,46.2276,2.2137
173,St Martin,France,2020-03-11T20:53:02,2,0,0,18.0708,-63.0501
191,Saint Barthelemy,France,2020-03-11T20:53:02,1,0,0,17.9,-62.8333


In [27]:
table[table['Last Update'].str.contains('2020-03-13')]

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
0,Hubei,China,2020-03-13T11:09:03,67786,3062,51553,30.9756,112.2707
1,Guangdong,China,2020-03-13T11:09:03,1356,8,1296,23.3417,113.4244
4,Hunan,China,2020-03-13T11:09:03,1018,4,1005,27.6104,111.7088
7,Shandong,China,2020-03-13T11:09:03,760,7,739,36.3427,118.1498
8,Jiangsu,China,2020-03-13T11:09:03,631,0,630,32.9711,119.455
9,Chongqing,China,2020-03-13T23:13:12,576,6,566,30.0572,107.874
10,Sichuan,China,2020-03-13T11:09:03,539,3,503,30.6171,102.7103
11,Heilongjiang,China,2020-03-13T11:09:03,482,13,446,47.862,127.7615
12,Beijing,China,2020-03-13T11:09:03,436,8,342,40.1824,116.4142
13,Shanghai,China,2020-03-13T11:09:03,346,3,324,31.202,121.4491


In [23]:
frame_s=frame.sort_values(by=['Country/Region','Last Update'])
frame_s[frame_s['Country/Region']=='France']

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
124,,France,1/24/20 17:00,2.0,,,,
165,,France,1/25/20 17:00,3.0,,,,
211,,France,1/26/20 16:00,3.0,,,,
258,,France,1/27/20 23:59,3.0,,,,
309,,France,1/28/20 23:00,4.0,,,,
361,,France,1/29/20 19:30,5.0,,,,
416,,France,1/30/20 16:00,5.0,,,,
470,,France,1/31/2020 23:59,5.0,,,,
532,,France,2/1/2020 1:52,6.0,0.0,0.0,,
599,,France,2020-02-01T01:52:40,6.0,0.0,0.0,,


In [14]:
path = r'/home/sasdemo/COVID_PROJECT/COVID_DATA/' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

    
frame = pd.concat(li, axis=0, ignore_index=True,sort=False)


In [12]:
all_f=['/home/sasdemo/COVID_PROJECT/COVID_DATA/03-11-2020.csv',
 '/home/sasdemo/COVID_PROJECT/COVID_DATA/03-12-2020.csv',
 '/home/sasdemo/COVID_PROJECT/COVID_DATA/03-13-2020.csv',
 '/home/sasdemo/COVID_PROJECT/COVID_DATA/03-14-2020.csv',
 '/home/sasdemo/COVID_PROJECT/COVID_DATA/03-15-2020.csv']

In [13]:
li = []

for filename in all_f:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

    
frame = pd.concat(li, axis=0, ignore_index=True,sort=False)
frame

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
0,Hubei,China,2020-03-11T10:53:02,67773,3046,49134,30.9756,112.2707
1,,Italy,2020-03-11T21:33:02,12462,827,1045,43.0000,12.0000
2,,Iran,2020-03-11T18:52:03,9000,354,2959,32.0000,53.0000
3,,"Korea, South",2020-03-11T21:13:18,7755,60,288,36.0000,128.0000
4,France,France,2020-03-11T22:53:03,2281,48,12,46.2276,2.2137
5,,Spain,2020-03-11T20:53:02,2277,54,183,40.0000,-4.0000
6,,Germany,2020-03-11T19:13:17,1908,3,25,51.0000,9.0000
7,Guangdong,China,2020-03-11T10:13:06,1356,8,1282,23.3417,113.4244
8,Henan,China,2020-03-11T08:13:09,1273,22,1249,33.8820,113.6140
9,Zhejiang,China,2020-03-11T09:33:12,1215,1,1195,29.1832,120.0934
