In [None]:
import pandas as pd
import json

# Function to extract data from a tabular file (CSV or Parquet format)
# If the file extension is invalid, it raises an exception with a warning message.

def extract_tabular_data(file_path: str):
    """Extract data from a tabular file_format, with pandas."""
    if file_path.endswith(".csv"):
        return pd.read_csv(file_path)
    
    elif file_path.endswith(".parquet"):
        return pd.read_parquet(file_path)
    
    else:
        raise Exception("Warning: Invalid file extension. Please try with .csv or .parquet!")


# Function to extract and flatten data from a JSON file
# Converts nested JSON data into a pandas DataFrame using json_normalize().

# Make sure that extract_json_data() takes a single parameter, file_path
def extract_json_data(file_path):
    """Extract and flatten data from a JSON file."""
    # First, read in the JSON file into memory using the json library
    with open(file_path, "r") as json_file:
        raw_data = json.load(json_file)
    
    
    return pd.json_normalize(raw_data)

  
# Function to transform electricity sales data
# Drops records with missing values, filters specific sectors, extracts year/month, and selects relevant columns.  

def transform_electricity_sales_data(raw_data: pd.DataFrame):
    """
    Transform electricity sales to find the total amount of electricity sold
    in the residential and transportation sectors.
    """
    # Drop any records with a null value
    raw_data.dropna(subset=["price"], inplace=True)
    
    # Only keep residential and transformation records
    cleaned_df = raw_data.loc[raw_data["sectorName"].isin(["residential", "transportation"]), :]
    
    # Create year and month columns
    cleaned_df["year"] = cleaned_df["period"].str[0:4]
    cleaned_df["month"] = cleaned_df["period"].str[5:]
    
    # Only keep columns period, stateId, sector, value, units
    cleaned_df = cleaned_df.loc[:, ["year", "month", "stateid", "price", "price-units"]]
    
    return cleaned_df

  
# Function to load a DataFrame to a file (CSV or Parquet format)
# Raises an exception if the file extension is invalid.  
 
def load(dataframe: pd.DataFrame, file_path: str):
    # Check to see if the file path ends with .csv or .parquet
    if file_path.endswith(".csv"):
        dataframe.to_csv(file_path)
        
    elif file_path.endswith(".parquet"):
        dataframe.to_parquet(file_path)
    
    # Otherwise, throw an exception
    else: raise Exception(f"Warning: {file_path} is not a valid file type. Please try again!")