In [1]:
# Import dependencies
import pandas as pd
import os
import glob
import numpy as np
import datetime 

In [2]:
# Use glob to get all the supply data csv files in the current working directory
path = os.getcwd()
csv_files = glob.glob(os.path.join(path, "*.csv"))

In [3]:
# Create empty list
dataframes_list = []

In [4]:
# Append datasets to the list 
for i in csv_files:
    temp_df = pd.read_csv(i)
    dataframes_list.append(temp_df)

In [5]:
# Check dataframes_list length for all suppy data CSV files
len(dataframes_list)

1446

In [6]:
# Create empty list for transformed DataFrames
transformed_dfs = []

In [8]:
# Transform each DataFrame in dataframes_list and append to transformed_dfs
for df in dataframes_list:    
    # Create date variable
    date = df.columns[0]

    # Reset index to energy sources and transpose
    df_transposed = df.set_index(date).T
    
    # Create date column
    df_transposed["Date"] = date
    
    # Create time column 
    df_transposed["Time"] = df_transposed.index

    # Reset index to start from 0
    df_transposed.reset_index(drop=True, inplace=True)
    
    # Cast 'Time' column to string and append seconds
    df_transposed['Time'] = df_transposed['Time'].apply(str) + ':00'
    
    # Add 'Date' and 'Time' columns together
    df_transposed['DateTime'] = df_transposed['Date'] + ' ' + df_transposed['Time']

    try:
        # Covert 'DateTime' column to datetime 
        df_transposed['DateTime'] =  pd.to_datetime(df_transposed['DateTime'])
    except:
        continue
        
    try:
        # Create 'Unix Timestamp' column
        df_transposed['Unix Timestamp'] = df_transposed.DateTime.apply(lambda x : (x-datetime.datetime(1970,1,1)).total_seconds())
    except:
        continue
    
    transformed_dfs.append(df_transposed)

In [9]:
# Create empty DataFrame for all supply data
supply_df = pd.DataFrame()

In [10]:
# Append each DataFrame in transformed_dfs to supply_df
for df in transformed_dfs:
    supply_df = supply_df.append(df, ignore_index = True)

supply_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 412956 entries, 0 to 412955
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   Renewables      412308 non-null  float64       
 1   Natural gas     412596 non-null  float64       
 2   Large hydro     412884 non-null  float64       
 3   Imports         412596 non-null  float64       
 4   Batteries       412884 non-null  float64       
 5   Nuclear         412884 non-null  float64       
 6   Coal            412884 non-null  float64       
 7   Other           412596 non-null  float64       
 8   Date            412956 non-null  object        
 9   Time            412956 non-null  object        
 10  DateTime        412956 non-null  datetime64[ns]
 11  Unix Timestamp  412956 non-null  float64       
dtypes: datetime64[ns](1), float64(9), object(2)
memory usage: 37.8+ MB


In [None]:
# Create the output file (CSV)
output_data_file = "CAISO-ALL-SUPPLY-DATA.csv"

# Export the DataFrame into a CSV without the index
supply_df.to_csv(output_data_file, index=False)