In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import pandas as pd
import numpy as np
from datetime import datetime


In [None]:
import os
import pandas as pd

def merge_csv_files(root_folder):
    all_files = []
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(root, file)
                all_files.append(file_path)

    if len(all_files) == 0:
        print("No CSV files found in the specified folders.")
        return

    dfs = []
    for file_path in all_files:
        df = pd.read_csv(file_path)
        dfs.append(df)

    merged_df = pd.concat(dfs, ignore_index=True)

    return merged_df

# Specify the root folder containing the CSV files
root_folder = "/content/drive/MyDrive/Energy time/"
merged_data = merge_csv_files(root_folder)

In [None]:
ts = merged_data
ts.head(3)

In [6]:
import pandas as pd
import numpy as np
from datetime import datetime

# Create a time series dataset
date_range = pd.date_range(start='2022-01-01', end='2022-12-31')
data = np.random.randn(len(date_range))
ts = pd.Series(data, index=date_range)

In [7]:
ts.head()

2022-01-01   -1.379896
2022-01-02    0.046909
2022-01-03   -0.354112
2022-01-04   -1.010931
2022-01-05   -0.622216
Freq: D, dtype: float64

In [8]:
ts.shape

(365,)

In [9]:
# Resampling: Convert daily data to monthly data
monthly_ts = ts.resample('M').mean()
print("\nResampled Monthly Time Series:")
print(monthly_ts)


Resampled Monthly Time Series:
2022-01-31   -0.061706
2022-02-28   -0.247124
2022-03-31   -0.387892
2022-04-30   -0.006515
2022-05-31   -0.087959
2022-06-30    0.013586
2022-07-31    0.216321
2022-08-31    0.083102
2022-09-30   -0.032644
2022-10-31   -0.108859
2022-11-30    0.038790
2022-12-31    0.019070
Freq: M, dtype: float64


In [10]:
# Shifting: Shift the time series by a certain number of periods
shifted_ts = ts.shift(periods=2)
print("\nShifted Time Series:")
print(shifted_ts)


Shifted Time Series:
2022-01-01         NaN
2022-01-02         NaN
2022-01-03   -1.379896
2022-01-04    0.046909
2022-01-05   -0.354112
                ...   
2022-12-27   -0.348218
2022-12-28    0.723954
2022-12-29   -0.665920
2022-12-30   -0.365966
2022-12-31   -0.692098
Freq: D, Length: 365, dtype: float64


In [11]:
# Rolling window: Calculate rolling mean over a window
rolling_mean = ts.rolling(window=3).mean()
print("\nRolling Mean Time Series:")
print(rolling_mean)



Rolling Mean Time Series:
2022-01-01         NaN
2022-01-02         NaN
2022-01-03   -0.562366
2022-01-04   -0.439378
2022-01-05   -0.662420
                ...   
2022-12-27   -0.096728
2022-12-28   -0.102644
2022-12-29   -0.574661
2022-12-30    0.129751
2022-12-31    0.716432
Freq: D, Length: 365, dtype: float64


In [12]:
# Date Range Generation
date_range_2 = pd.date_range(start='2022-01-01', periods=5)
print("\nDate Range Generation:")
print(date_range_2)


Date Range Generation:
DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05'],
              dtype='datetime64[ns]', freq='D')


In [13]:
# Date Range Generation
date_range_2 = pd.date_range(start='2022-01-01', periods=5)
print("\nDate Range Generation:")
print(date_range_2)


Date Range Generation:
DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05'],
              dtype='datetime64[ns]', freq='D')


In [14]:
type(date_range_2)

pandas.core.indexes.datetimes.DatetimeIndex

In [15]:
# Date Range Indexing
indexed_ts = ts[date_range_2[1]:date_range_2[3]]
print("\nDate Range Indexing:")
print(indexed_ts)


Date Range Indexing:
2022-01-02    0.046909
2022-01-03   -0.354112
2022-01-04   -1.010931
Freq: D, dtype: float64


In [16]:
# Handling Missing Values
ts_with_missing = ts.copy()
ts_with_missing[2] = np.nan
print("\nTime Series with Missing Values:")
print(ts_with_missing)
print("\nTime Series after Filling Missing Values:")
print(ts_with_missing.fillna(method='ffill'))


Time Series with Missing Values:
2022-01-01   -1.379896
2022-01-02    0.046909
2022-01-03         NaN
2022-01-04   -1.010931
2022-01-05   -0.622216
                ...   
2022-12-27   -0.665920
2022-12-28   -0.365966
2022-12-29   -0.692098
2022-12-30    1.447317
2022-12-31    1.394077
Freq: D, Length: 365, dtype: float64

Time Series after Filling Missing Values:
2022-01-01   -1.379896
2022-01-02    0.046909
2022-01-03    0.046909
2022-01-04   -1.010931
2022-01-05   -0.622216
                ...   
2022-12-27   -0.665920
2022-12-28   -0.365966
2022-12-29   -0.692098
2022-12-30    1.447317
2022-12-31    1.394077
Freq: D, Length: 365, dtype: float64


In [17]:
# Date Range Frequency
date_range_3 = pd.date_range(start='2022-01-01', end='2022-12-31', freq='2D')
print("\nDate Range with Frequency of 2 Days:")
print(date_range_3)


Date Range with Frequency of 2 Days:
DatetimeIndex(['2022-01-01', '2022-01-03', '2022-01-05', '2022-01-07',
               '2022-01-09', '2022-01-11', '2022-01-13', '2022-01-15',
               '2022-01-17', '2022-01-19',
               ...
               '2022-12-13', '2022-12-15', '2022-12-17', '2022-12-19',
               '2022-12-21', '2022-12-23', '2022-12-25', '2022-12-27',
               '2022-12-29', '2022-12-31'],
              dtype='datetime64[ns]', length=183, freq='2D')


In [18]:
# Time Zone Handling
ts_utc = ts.tz_localize('UTC')
print("\nTime Series with UTC Time Zone:")
print(ts_utc)


Time Series with UTC Time Zone:
2022-01-01 00:00:00+00:00   -1.379896
2022-01-02 00:00:00+00:00    0.046909
2022-01-03 00:00:00+00:00   -0.354112
2022-01-04 00:00:00+00:00   -1.010931
2022-01-05 00:00:00+00:00   -0.622216
                               ...   
2022-12-27 00:00:00+00:00   -0.665920
2022-12-28 00:00:00+00:00   -0.365966
2022-12-29 00:00:00+00:00   -0.692098
2022-12-30 00:00:00+00:00    1.447317
2022-12-31 00:00:00+00:00    1.394077
Freq: D, Length: 365, dtype: float64


In [19]:
# Time Series Shifting (Time Zone)
ts_utc_shifted = ts_utc.tz_convert('US/Eastern')
print("\nTime Series Shifted to US/Eastern Time Zone:")
print(ts_utc_shifted)


Time Series Shifted to US/Eastern Time Zone:
2021-12-31 19:00:00-05:00   -1.379896
2022-01-01 19:00:00-05:00    0.046909
2022-01-02 19:00:00-05:00   -0.354112
2022-01-03 19:00:00-05:00   -1.010931
2022-01-04 19:00:00-05:00   -0.622216
                               ...   
2022-12-26 19:00:00-05:00   -0.665920
2022-12-27 19:00:00-05:00   -0.365966
2022-12-28 19:00:00-05:00   -0.692098
2022-12-29 19:00:00-05:00    1.447317
2022-12-30 19:00:00-05:00    1.394077
Freq: D, Length: 365, dtype: float64


In [20]:
# Timestamp to DateTime Conversion
timestamp = 1645075200000000000  # Nanoseconds since Unix epoch
dt = pd.to_datetime(timestamp)
print("\nTimestamp to DateTime Conversion:")
print(dt)


Timestamp to DateTime Conversion:
2022-02-17 05:20:00
