In [None]:
import os
import requests
import json
import time
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from IPython.display import clear_output
from IPython.display import display
import ipywidgets as wdg

%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100


########## Click the button to update the data and plot ##########
# Creating the output area
from ipywidgets import Output, Button
out = Output()
# Set button function
def access_api(button):
    with out:
        #polling the API here
        class APIwrapper:
            _access_point="https://api.ukhsa-dashboard.data.gov.uk"
            _last_access=0.0 # time of last api access
            
            def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
                """ Init the APIwrapper object, constructing the endpoint from the structure
                parameters """
                # build the path with all the required structure parameters. You do not need to edit this line,
                # parameters will be replaced by the actual values when you instantiate an object of the class!
                url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                          f"{geography_type}/geographies/{geography}/metrics/{metric}")
                # our starting API endpoint
                self._start_url=APIwrapper._access_point+url_path
                self._filters=None
                self._page_size=-1
                # will contain the number of items
                self.count=None
        
            def get_page(self, filters={}, page_size=5):
                """ Access the API and download the next page of data. Sets the count
                attribute to the total number of items available for this query. Changing
                filters or page_size will cause get_page to restart from page 1. Rate
                limited to three request per second. The page_size parameter sets the number
                of data points in one response page (maximum 365); use the default value 
                for debugging your structure and filters. """
                # Check page size is within range
                if page_size>365:
                    raise ValueError("Max supported page size is 365")
                # restart from first page if page or filters have changed
                if filters!=self._filters or page_size!=self._page_size:
                    self._filters=filters
                    self._page_size=page_size
                    self._next_url=self._start_url
                # signal the end of data condition
                if self._next_url==None: 
                    return [] # we already fetched the last page
                # simple rate limiting to avoid bans
                curr_time=time.time() # Unix time: number of seconds since the Epoch
                deltat=curr_time-APIwrapper._last_access
                if deltat<0.33: # max 3 requests/second
                    time.sleep(0.33-deltat)
                APIwrapper._last_access=curr_time
                # build parameter dictionary by removing all the None
                # values from filters and adding page_size
                parameters={x: y for x, y in filters.items() if y!=None}
                parameters['page_size']=page_size
                # the page parameter is already included in _next_url.
                # This is the API access. Response is a dictionary with various keys.
                # the .json() method decodes the response into Python object (dictionaries,
                # lists; 'null' values are translated as None).
                response = requests.get(self._next_url, params=parameters).json()
                # update url so we'll fetch the next page
                self._next_url=response['next']
                self.count=response['count']
                # data are in the nested 'results' list
                return response['results'] 
        
            def get_all_pages(self, filters={}, page_size=365):
                """ Access the API and download all available data pages of data. Sets the count
                attribute to the total number of items available for this query. API access rate
                limited to three request per second. The page_size parameter sets the number
                of data points in one response page (maximum 365), and controls the trade-off
                between time to load a page and number of pages; the default should work well 
                in most cases. The number of items returned should in any case be equal to 
                the count attribute. """
                data=[] # build up all data here
                while True:
                    # use get_page to do the job, including the pacing
                    next_page=self.get_page(filters, page_size)
                    if next_page==[]:
                        break # we are done
                    data.extend(next_page)
                return data
        
        
        structure={"theme": "infectious_disease", 
                   "sub_theme": "respiratory",
                   "topic": "COVID-19",
                   "geography_type": "Nation", 
                   "geography": "England", 
                  }
        
        #get data of test
        structure["metric"]="COVID-19_testing_PCRcountByDay" 
        api=APIwrapper(**structure)
        data_testing=api.get_all_pages()
        # print(f"Data points expected: {api.count}")
        # print(f"Data points retrieved: {len(data_testing)}")
        # print(data_testing)
        with open("data_testing.json", "wt") as OUTF:
            json.dump(data_testing, OUTF)
            
        #get data of cases 
        structure["metric"]="COVID-19_cases_casesByDay" 
        api=APIwrapper(**structure)
        data_cases=api.get_all_pages()
        # print(f"Data points expected: {api.count}")
        # print(f"Data points retrieved: {len(data_cases)}")
        # print(data_cases)
        with open("data_cases.json", "wt") as OUTF:
            json.dump(data_cases, OUTF)
    
        # Process data
        data={}
        for dataset in [data_testing, data_cases]:
            for entry in dataset:
                date=entry['date']
                metric=entry['metric']
                value=entry['metric_value']
                if date not in data:
                    data[date]={}
                data[date][metric]=value 

        dates=list(data.keys())
        dates.sort()

        
        def parse_date(datestring):
            """ Convert a date string into a pandas datetime object """
            return pd.to_datetime(datestring, format="%Y-%m-%d") 
        startdate=parse_date(dates[0])
        enddate=parse_date(dates[-1])
        # print (startdate, ' to ', enddate)
        
        index=pd.date_range(startdate, enddate, freq='D') #Produces a time index representing the consecutive dates by day (freq='D') from startdate to enddate
        timeseriesdf=pd.DataFrame(index=index, columns=['Testing', 'Cases']) #Create a Pandas DataFrame timeseriesdf with the dataframe from pd, the row index is the index (by date of day) generated in the previous step, and the columns are the specified ['testing', 'cases']
        
        # translate the columns to our metrics
        metric ={'Testing': 'COVID-19_testing_PCRcountByDay',
                  'Cases': 'COVID-19_cases_casesByDay'}
        
        for date, entry in data.items():
            pd_date=parse_date(date) # convert to Pandas format
            for column in ['Testing', 'Cases']: 
                metric_name=metric[column]
                # do not assume all values are there for every date - if a value is not available, insert a 0.0
                value= entry.get(metric_name, 0.0)
                # this is the way you access a specific location in the dataframe - use .loc
                # and put index,column in a single set of [ ]
                timeseriesdf.loc[date, column]=value
                    
        # fill in any remaining "holes" due to missing dates
        timeseriesdf.fillna(0.0, inplace=True)        
        # timeseriesdf
        
        # # Plotting 
        # ax=timeseriesdf.plot() # easy peasy... 使用 Pandas 的 .plot() 方法，对数据框 timeseriesdf 中的列进行绘图，默认绘制折线图
        # ax.set_title('Daily cases, Daily test-'); #使用 Matplotlib 的 set_title 方法，为绘图设置题。
        # ax=timeseriesdf.plot(logy=True) # ...lemon squeezy logy=True将 y 轴设置为对数刻度（logarithmic scale）
        # ax.set_title('Daily cases, Daily test(Logarithmic scale)');
        
        # pandas makes saving to a pickle file dead easy:
        timeseriesdf.to_pickle("timeseriesdf_cases_test.pkl")
        
        
        
        ### Multiple-control graphs ###
        timeseriesdf=pd.read_pickle("timeseriesdf_cases_test.pkl")#Load the data frame from a pickle file
        
        # Generate a drop-down list to select data categories
        series=wdg.SelectMultiple( 
            options=list(timeseriesdf.columns), # Dynamically retrieve the column names
            value=list(timeseriesdf.columns), # By default, all columns are selected
            rows=len(timeseriesdf.columns),
            description='Type:',
            disabled=False
        )
        
        # Generate a drop-down list to select the scale
        scale=wdg.RadioButtons(
            options=['linear', 'log'], # Two options are available: linear and log.
            description='Scale:',
            disabled=False
        )
        
        # Extract all years
        unique_years = [int(year) for year in timeseriesdf.index.year.unique()] #Ensure that the type of year is int
        # Create a year selection control
        year_selector = wdg.SelectMultiple(
            options=unique_years,  # All years
            value= (unique_years[0],),  # The default value is the first year
            description='Year:',
            rows=5,  
            disabled=False
        )
        
        # Horizontal alignment control
        controls=wdg.HBox([year_selector,series, scale]) 
        
        
        #Defining plotting functions
        def timeseries_graph(gcols, gscale,selected_year):
            filtered_df = timeseriesdf[timeseriesdf.index.year.isin(selected_year)] # Filter the data by year
            logscale = (gscale == 'log') # Set a logarithmic scale
            ncols=len(gcols)
            if ncols>0: 
                filtered_df[list(gcols)].plot(logy=logscale) # Controls whether a logarithmic scale is used
                years = ', '.join(map(str, selected_year))  # Concatenate the year as a string
                plt.title(f"Time Series of Selected Types ({years})")  # Setting the chart title
                plt.ylabel("Value")  # Y-axis labels
                plt.xlabel("Date")  # X-axis labels
                plt.show() # important - graphs won't update if this is missing 
            else:
                print("Click to select data for graph") # Prompt the user to select a data category if no columns are selected
                print("(CTRL-Click to select more than one category)") 
        
        # capture output in widget graph   
        graph=wdg.interactive_output(timeseries_graph, {'gcols': series, 'gscale': scale,'selected_year':year_selector}) 
        
        display(controls, graph)
    
        
        #Update button state
        apibutton.icon="check" # Change the button icon to "check" to indicate that the task is complete
        apibutton.disabled=True #Prevent repeated clicks

# see the doc for the parameters, Creating a button object  
apibutton=wdg.Button(
    description='Refresh data', 
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to download current data',
    icon='download' # (FontAwesome names without the `fa-` prefix)
)

# register the callback function with the button
apibutton.on_click(access_api)

# display the widgets
display(apibutton,out)


Button(description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Click to download current da…

Output()