In [4]:
import pandas as pd

# Load the dataset
temperature_path = "temperatureData_clean/US_Temp_City_RegionOnly_1998_2019.csv"
df = pd.read_csv(temperature_path)

# Ensure 'Date' is in datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Group by Region and get count, min, and max of Date
region_summary = df.groupby("Region").agg(
    Entry_Count=("Date", "count"),
    Start_Date=("Date", "min"),
    End_Date=("Date", "max")
).reset_index()

# Display result
print(region_summary)


     Region  Entry_Count Start_Date   End_Date
0     COMED          945 1998-01-01 2013-09-01
1      DEOK          189 1998-01-01 2013-09-01
2       DOM          756 1998-01-01 2013-09-01
3       DUQ          189 1998-01-01 2013-09-01
4        FE          189 1998-01-01 2013-09-01
5        NI          378 1998-01-01 2013-09-01
6      PJME          756 1998-01-01 2013-09-01
7      PJMW          945 1998-01-01 2013-09-01
8  PJM_Load         1701 1998-01-01 2013-09-01


In [10]:
import os
import pandas as pd

# Folder path
folder_path = "energyData_clean"

# File names
file_names = [
    "AEP_hourly_daily.csv",
    "COMED_hourly_daily.csv",
    "DAYTON_hourly_daily.csv",
    "DEOK_hourly_daily.csv",
    "DOM_hourly_daily.csv",
    "DUQ_hourly_daily.csv",
    "EKPC_hourly_daily.csv",
    "FE_hourly_daily.csv",
    "NI_hourly_daily.csv",
    "PJM_Load_cleaned_daily.csv",
    "PJM_Load_hourly_daily.csv",
    "PJME_hourly_daily.csv",
    "PJMW_hourly_daily.csv"
]

# Header
print(f"{'File':<35} {'Entries':>10} {'Start Date':>15} {'End Date':>15}")
print("-" * 80)

# Loop through files
for filename in file_names:
    path = os.path.join(folder_path, filename)
    try:
        df = pd.read_csv(path)
        date_col = next((col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()), None)

        if date_col:
            df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
            df = df.dropna(subset=[date_col])
            entry_count = len(df)
            start_date = df[date_col].min().strftime('%Y-%m-%d')
            end_date = df[date_col].max().strftime('%Y-%m-%d')
            print(f"{filename:<35} {entry_count:>10} {start_date:>15} {end_date:>15}")
        else:
            print(f"{filename:<35} {'--':>10} {'No date column':>15} {'':>15}")
    except Exception as e:
        print(f"{filename:<35} {'--':>10} {'Error':>15} {str(e)[:25]:>15}")


File                                   Entries      Start Date        End Date
--------------------------------------------------------------------------------
AEP_hourly_daily.csv                      5055      2004-10-01      2018-08-03
COMED_hourly_daily.csv                    2772      2011-01-01      2018-08-03
DAYTON_hourly_daily.csv                   5055      2004-10-01      2018-08-03
DEOK_hourly_daily.csv                     2407      2012-01-01      2018-08-03
DOM_hourly_daily.csv                      4843      2005-05-01      2018-08-03
DUQ_hourly_daily.csv                      4963      2005-01-01      2018-08-03
EKPC_hourly_daily.csv                     1890      2013-06-01      2018-08-03
FE_hourly_daily.csv                       2621      2011-06-01      2018-08-03
NI_hourly_daily.csv                       2437      2004-05-01      2011-01-01
PJM_Load_cleaned_daily.csv                1372      1998-04-01      2002-01-01
PJM_Load_hourly_daily.csv                 1372    