In [1]:
import urllib.request as url
import pandas as pd
import matplotlib.pyplot as mpl
import sys


# Tupple(csvFilename,dateColumnName,location)
casesBySeverity = ("deaths-and-active-cases-in-icu-requiring-oxygen-supplementation-or-hospitalised.csv","day_of_as_of_date","%Y/%m/%d","NA")
casesTotal = ("owid-covid-data.csv","date","%Y/%m/%d","Singapore")


# input: dataframe
# return: reconstructed df
def restructureDataFrame(data):
    currentSevereCases = data.groupby(['day_of_as_of_date','clinical_status']).sum()
    pivotTable = pd.pivot_table(currentSevereCases,values="count_of_case",index=["day_of_as_of_date"],columns=["clinical_status"])
#     pivotTable["TotalSevereCases"] = pivotTable.sum(axis=1)
    return pivotTable


# input: csv file
# return: df sorted by date
def readCsv(dSet):
    dataSet = pd.read_csv(dSet[0])
    if dSet[3] == "Singapore":
        dataSet = dataSet[dataSet.location.eq("Singapore")]
    dataSet[dSet[1]] = pd.to_datetime(dataSet[dSet[1]],format=dSet[2]).dt.strftime("%Y-%m-%d")
    dfByDate = dataSet.sort_values(by=[dSet[1]])
    return dfByDate


# input: dataframe(index=datetime),month(int),year(int)
# output: sortedDf
def sortByDate(dataFrame,month,year):
    try:
        if month == 0.00:
            startdate = pd.to_datetime("%s-01-1"%year).date()
            enddate = pd.to_datetime("%s-12-1"%year).date()
            enddate = pd.Period(enddate,freq='M').end_time.date()
            dataFrame = dataFrame.loc[startdate:enddate]
            dataFrame = dataFrame.groupby([lambda x: x.year, lambda x: x.month]).sum()
#             totalSevere = dataFrame["DailyICU"]+ dataFrame["DailyOxygen"]
#             dataFrame["SeverityRate(%)"] = (totalSevere/dataFrame["DailyCases"])*100
#             dataFrame = dataFrame.round(2)
        else:
            startdate = pd.to_datetime('%s-%s-1'%(year,month)).date()
            enddate = pd.to_datetime('%s-%s-1'%(year,month)).date()
            enddate = pd.Period(enddate,freq='M').end_time.date()
            dataFrame = dataFrame.loc[startdate:enddate]
#             dataFrame = dataFrame.round(2)
    except:
        print("Please Input correct year")
        sys.exit()
    return dataFrame


def reCalculateRate(dataFrame,total,columns,newcolumn):
    df = dataFrame.copy()
    columnTotal = df[columns[0]]
    for column in columns[1:len(columns)]:
#             print(column)
        columnTotal +=  df[column]
    dataFrame[newcolumn] = (columnTotal/dataFrame[total])*100
    dataFrame = dataFrame.round(2)
    return dataFrame

        
# calculate the Daily Severe Cases from current severe cases
# input: dataframe, new Column Name, target Column name
# return: Null
def calculateDailySevereCases(dataframe,newColumn,targetColumn):
    dataframe[newColumn] = dataframe[targetColumn].diff()
    dataframe[newColumn] = dataframe.apply(lambda x: x[newColumn] if x[newColumn]>0 else 0, axis=1) 

    
# input: df1, df2
# Return: merged Df
def merge2Table(df1,df2):
    mergeTable = df1.join(df2)
#     mergeTable = dailyCasesDF.join(currentSevereCasesDF)
    mergeTable.index = pd.to_datetime(mergeTable.index)
    mergeTable.index = mergeTable.index.date
    return mergeTable


def calculateSeverityRate(mergeTable):
    mergeTable.columns = mergeTable.columns.str.replace(' ', '')
    calculateDailySevereCases(mergeTable,"DailyHospitalised","Hospitalised")
    calculateDailySevereCases(mergeTable,"DailyICU","InIntensiveCareUnit")
    calculateDailySevereCases(mergeTable,"DailyOxygen","RequiresOxygenSupplementation")
    totalSevere = mergeTable["DailyICU"]+ mergeTable["DailyOxygen"]
    mergeTable["SeverityRate(%)"] = (totalSevere/mergeTable["DailyCases"])*100
    mergeTable = mergeTable.round(2)
    return mergeTable


# input: dataFrame
# return: null
def plotGraph(dataFrame):
    ax = dataFrame[['Hospitalised','InIntensiveCareUnit','RequiresOxygenSupplementation']].plot(kind='bar', title="Total Hospitalisation", figsize=(15, 10),legend=True, fontsize=9,stacked=True)
    ax.set_xlabel("Date", fontsize=12)
    ax.set_ylabel("Cases", fontsize=12)
#     mpl.show()


pd.set_option("display.max_rows", None, "display.max_columns", None)
dailyCasesDF = readCsv(casesTotal)[['date','new_cases']].set_index('date').rename(columns={"new_cases":"DailyCases"})
currentSevereCasesDF = restructureDataFrame(readCsv(casesBySeverity))
mergeTable = merge2Table(dailyCasesDF,currentSevereCasesDF)

# Import this file call the variable below to get the table
severityRateDF = calculateSeverityRate(mergeTable)
# plotGraph(sortByDate(severityRateDF,10,2021))

In [29]:
dataFrame = sortByDate(severityRateDF,10,2021)
# dataFrame = reCalculateRate(dataFrame,"DailyCases",["DailyICU","DailyOxygen"],"SeverityRate(%)")["SeverityRate(%)"]
ax = dataFrame[["DailyICU","DailyOxygen"]]
# .plot(kind='barh', title="Daily Severe Cases", figsize=(15, 10),legend=True, fontsize=9,stacked=True,color=["orange","cyan"])
# ax.set_xlabel("Cases", fontsize=12)
# ax.set_ylabel("Date", fontsize=12)
ax

Unnamed: 0,DailyICU,DailyOxygen
2021-10-01,0.0,18.0
2021-10-02,0.0,21.0
2021-10-03,3.0,7.0
2021-10-04,1.0,0.0
2021-10-05,0.0,21.0
2021-10-06,3.0,8.0
2021-10-07,3.0,42.0
2021-10-08,1.0,10.0
2021-10-09,0.0,0.0
2021-10-10,1.0,0.0


In [4]:
sortByDate(severityRateDF,0,2021)
print(reCalculateRate(sortByDate(severityRateDF,0,2021),"DailyCases",["DailyICU","DailyOxygen"]))


DailyOxygen
         DailyCases  Deceased  Hospitalised  InIntensiveCareUnit  \
2021 1        937.0       0.0           0.0                  0.0   
     2        400.0       0.0           0.0                  0.0   
     3        445.0       0.0           0.0                  0.0   
     4        764.0       0.0           0.0                  0.0   
     5        906.0       0.0           0.0                  0.0   
     6        528.0       0.0           0.0                  0.0   
     7       2402.0       0.0           0.0                  0.0   
     8       2639.0       0.0           0.0                  0.0   
     9      28901.0      33.0        8171.0                228.0   
     10     62066.0     163.0       24246.0                924.0   

         RequiresOxygenSupplementation  DailyHospitalised  DailyICU  \
2021 1                             0.0                0.0       0.0   
     2                             0.0                0.0       0.0   
     3                    

In [7]:
severityRateDF

Unnamed: 0,DailyCases,Deceased,Hospitalised,InIntensiveCareUnit,RequiresOxygenSupplementation,DailyHospitalised,DailyICU,DailyOxygen,SeverityRate(%)
2020-01-23,1.0,,,,,0.0,0.0,0.0,0.0
2020-01-24,2.0,,,,,0.0,0.0,0.0,0.0
2020-01-25,0.0,,,,,0.0,0.0,0.0,
2020-01-26,1.0,,,,,0.0,0.0,0.0,0.0
2020-01-27,1.0,,,,,0.0,0.0,0.0,0.0
2020-01-28,2.0,,,,,0.0,0.0,0.0,0.0
2020-01-29,0.0,,,,,0.0,0.0,0.0,
2020-01-30,3.0,,,,,0.0,0.0,0.0,0.0
2020-01-31,3.0,,,,,0.0,0.0,0.0,0.0
2020-02-01,3.0,,,,,0.0,0.0,0.0,0.0


In [8]:
dataFrame = sortByDate(severityRateDF,10,2021)
reCalculateRate(dataFrame,"DailyCases",["DailyICU","DailyOxygen"],"SeverityRate(%)")["SeverityRate(%)"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[newcolumn] = (columnTotal/dataFrame[total])*100


2021-10-01    0.62
2021-10-02    0.89
2021-10-03    0.49
2021-10-04    0.04
2021-10-05    0.60
2021-10-06    0.31
2021-10-07    1.29
2021-10-08    0.31
2021-10-09    0.00
2021-10-10    0.04
2021-10-11    0.75
2021-10-12    0.00
2021-10-13    0.41
2021-10-14    0.34
2021-10-15    0.41
2021-10-16    0.42
2021-10-17    0.69
2021-10-18    0.43
2021-10-19    0.13
2021-10-20    0.00
Name: SeverityRate(%), dtype: float64