In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# Read all csv files from a specified directory into separate dataframes
def read_csv_files(directory, format='csv'):
    """
    Read all csv files from a specified directory into separate dataframes\n
    Parameters:
        directory (str): The directory containing the csv files
        format (str): The format of the files to read. Default is 'csv'
    Returns (list): List of dataframes
    """
    # Get all csv files in the specified directory
    files = [f for f in os.listdir(directory) if f.endswith('.{format}'.format(format=format))]
    # Read all csv files into separate dataframes
    if format == 'csv':
        dataframes = [pd.read_csv(os.path.join(directory, f)) for f in files]
    elif format == 'xlsx':
        dataframes = [pd.read_excel(os.path.join(directory, f)) for f in files]
    else:
        raise ValueError('Invalid file format. Only csv and xlsx files are supported')
    return dataframes

In [7]:
directory_path = r'C:\Users\nikis\OneDrive\StudyMat\NASA_Space_App_Challenge\data\raw\csv'
dataframes = read_csv_files(directory_path, format='csv')

In [None]:
# Aggregate months of data into a single dataframe
def merge_dataframes(dataframes, num_months=12):
    """
    Aggregate months of data into a single dataframe\n
    Parameters:
        dataframes (list): List of dataframes
        num_months (int): Number of months to aggregate. Default is 12
    Returns (DataFrame): Merged dataframe
    """
    # Get the total number of rows in each dataframe
    rows = [df.shape[0] for df in dataframes]
    # Choose the dataframe with the least number of rows as the base dataframe
    base_df = dataframes[np.argmin(rows)]
    # Merge the rest of the dataframes into the base dataframe
    for df in dataframes:
        if not df.equals(base_df):
            base_df = pd.merge(base_df, df['value'], how='inner', on='Date')
    return base_df