# Time Series Analysis of July 2006 Heat Wave

This notebook processes and makes plots of the temporal evolution of meteorology during the course of the July 2006 heat wave in the western U.S.

In [47]:
# Start by importing required packages and information about your operating system:
import os 

import pandas as pd

from glob import glob


## Set the Directory Structure

In [2]:
# Identify the top-level data input and output directories:
data_input_dir =  '/Volumes/LaCie/wrf_to_tell/wrf_tell_counties_output'
data_output_dir =  '/Users/burl878/Documents/code_repos/ntp_heat_wave/data'

# If the "data_output_dir" subdirectory doesn't exist then create it:
if not os.path.exists(data_output_dir):
   os.makedirs(data_output_dir)


## Process the Population-Weighted Time Series:

In [40]:
# Define a function to convert from Kelvin to Fahrenheit:
def kelvin_to_farenheight(temperature_K: int):
    temperature_F = (1.8*(temperature_K - 273) + 32).round(2)
    return temperature_F


In [50]:
# Set the year and scenario to process:
year_to_process = 2006
scenario_to_process = 'historic'

# Define function to process the population-weighted time series for a given year and scenario:
def process_met_time_series(data_input_dir: str, year_to_process: int, scenario_to_process: str):
    
    # Read in the historical population file:
    pop_df = pd.read_csv(os.path.join(data_output_dir, 'county_populations_2000_to_2020.csv'))
    
    # Rename some columns:
    pop_df.rename(columns={'pop_2006': 'Population','state_name': 'State', 'county_name': 'County', 'county_FIPS': 'FIPS'}, inplace=True)
    
    # Only keep the columns that are needed:
    pop_df = pop_df[['State', 'County', 'FIPS', 'Population']].copy()
    
    # Set the data input directory:
    met_data_dir = os.path.join(data_input_dir, scenario_to_process, str(year_to_process))

    
    filelist = glob(met_data_dir, '*.csv')
    
    
    # Set a filename for testing:
    file = '2006_01_01_00_UTC_County_Mean_Meteorology.csv'
    
    # Extract the time string from the name of the file:
    file_time = file.replace("_UTC_County_Mean_Meteorology.csv", "")

    # Read in the .csv file:
    met_df = pd.read_csv(os.path.join(met_data_dir, file))

    # Only keep the columns that are needed:
    met_df = met_df[['FIPS', 'T2']].copy()
    
    # Merge the dataframes based on county FIPS code :
    df_combined = pd.merge(met_df, pop_df, how='left', on=['FIPS'])
    
    # Subset to only counties in the WECC:
    wecc_df = df_combined.loc[df_combined['State'].isin(['Washington', 'Oregon', 'California', 'Montana', 'Idaho', 'Nevada', 'Arizona', 'New Mexico', 'Colorado', 'Utah', 'Wyoming'])].copy()
    
    # Calculate the total population in the WECC:
    wecc_total_population = wecc_df['Population'].sum()
    
    # Calculate the fraction of the total population that lives in each county:
    wecc_df['Population_Fraction'] = wecc_df['Population'] / wecc_total_population
    
    # Calculate population-weighted mean temperature in the WECC:
    wecc_mean_temperature = (wecc_df['T2'] * wecc_df['Population_Fraction']).sum().round(2)
    
    # Subset to only counties in California:
    ca_df = df_combined.loc[df_combined['State'].isin(['California'])].copy()
    
    # Calculate the total population in the California:
    ca_total_population = ca_df['Population'].sum()
    
    # Calculate the fraction of the total population that lives in each county:
    ca_df['Population_Fraction'] = ca_df['Population'] / ca_total_population
    
    # Calculate population-weighted mean temperature in the California:
    ca_mean_temperature = (ca_df['T2'] * ca_df['Population_Fraction']).sum().round(2)
    
    # Calculate the total population in the U.S.:
    us_total_population = df_combined['Population'].sum()
    
    # Calculate the fraction of the total population that lives in each county:
    df_combined['Population_Fraction'] = df_combined['Population'] / us_total_population
    
    # Calculate population-weighted mean temperature in the U.S.:
    us_mean_temperature = (df_combined['T2'] * df_combined['Population_Fraction']).sum().round(2)
    
    # Subset to only Los Angeles county:
    la_df = df_combined.loc[df_combined['FIPS'] == 6037].copy()
    
    # Extract the temperature value for Los Angeles county:
    la_mean_temperature = la_df['T2'].sum().round(2)
    
    # Initiate an empty dataframe and counter to store the results:
    output_df = pd.DataFrame()
    output_df.loc[0, 'Time_UTC'] = pd.to_datetime(file_time, exact=False, format='%Y_%m_%d_%H')
    output_df.loc[0, 'US_T2_F'] = kelvin_to_farenheight(us_mean_temperature)
    output_df.loc[0, 'WECC_T2_F'] = kelvin_to_farenheight(wecc_mean_temperature)
    output_df.loc[0, 'CA_T2_F'] = kelvin_to_farenheight(ca_mean_temperature)
    output_df.loc[0, 'LA_T2_F'] = kelvin_to_farenheight(la_mean_temperature)
    
    return filelist
    
a = process_met_time_series(data_input_dir = data_input_dir, year_to_process = year_to_process, scenario_to_process = scenario_to_process)

a


TypeError: glob() takes 1 positional argument but 2 were given