## Time Series Analysis

In [4]:
import pandas as pd
import numpy as np

dates = pd.date_range(start='2023-01-01', periods=100, freq='D') 
data = { 
'Date': dates, 
'Temperature': np.random.randint(10, 35, size=100), 
'Stock_Price': np.random.uniform(100, 200, size=100).cumsum() 
} 
df_time = pd.DataFrame(data) 
df_time.set_index('Date', inplace=True)
df_time

Unnamed: 0_level_0,Temperature,Stock_Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,16,188.492108
2023-01-02,24,335.261443
2023-01-03,11,520.769340
2023-01-04,23,635.218228
2023-01-05,27,754.169332
...,...,...
2023-04-06,19,14306.209895
2023-04-07,22,14493.562805
2023-04-08,18,14620.399016
2023-04-09,26,14778.921956


In [10]:
# 25. Select all data from df_time for the month of February 2023. 
df = df_time['2023-02-01':'2023-02-28']
df

Unnamed: 0_level_0,Temperature,Stock_Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-02-01,16,4733.388096
2023-02-02,21,4882.185607
2023-02-03,34,4988.830237
2023-02-04,34,5104.1768
2023-02-05,17,5288.301613
2023-02-06,31,5422.051492
2023-02-07,33,5578.120698
2023-02-08,17,5732.036766
2023-02-09,27,5877.659972
2023-02-10,18,5996.978754


In [14]:
# 26. Resample the daily Stock_Price data to get the monthly average stock price. 
df_time['Stock_Price'].resample('ME').mean()

Date
2023-01-31     2469.710868
2023-02-28     6747.567090
2023-03-31    11247.397555
2023-04-30    14237.071900
Freq: ME, Name: Stock_Price, dtype: float64

In [17]:
# 27. Calculate the 7-day rolling average of the 'Temperature' column. 
df_time['Temperature'].rolling(7).mean()

Date
2023-01-01          NaN
2023-01-02          NaN
2023-01-03          NaN
2023-01-04          NaN
2023-01-05          NaN
                ...    
2023-04-06    17.571429
2023-04-07    19.000000
2023-04-08    18.285714
2023-04-09    19.857143
2023-04-10    22.428571
Name: Temperature, Length: 100, dtype: float64

In [18]:
# 28. Calculate the percentage change in 'Stock_Price' from the previous day. 
df_time['Stock_Price'].pct_change()

Date
2023-01-01         NaN
2023-01-02    0.778650
2023-01-03    0.553323
2023-01-04    0.219769
2023-01-05    0.187260
                ...   
2023-04-06    0.010318
2023-04-07    0.013096
2023-04-08    0.008751
2023-04-09    0.010843
2023-04-10    0.007828
Name: Stock_Price, Length: 100, dtype: float64

In [19]:
# 29. Find the date with the maximum temperature. 
df_time['Temperature'].idxmax()

Timestamp('2023-01-18 00:00:00')

In [20]:
# 30. Create a new column 'Day_of_Week' in df_time containing the name of the day (e.g., Monday). 
df_time['Day_of_Week'] = df_time.index.day_name()
df_time[['Temperature','Stock_Price','Day_of_Week']].head()

Unnamed: 0_level_0,Temperature,Stock_Price,Day_of_Week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,16,188.492108,Sunday
2023-01-02,24,335.261443,Monday
2023-01-03,11,520.76934,Tuesday
2023-01-04,23,635.218228,Wednesday
2023-01-05,27,754.169332,Thursday


In [21]:
# 31. Calculate the cumulative sum of 'Temperature' over time. 
df_time['Temperature'].cumsum()

Date
2023-01-01      16
2023-01-02      40
2023-01-03      51
2023-01-04      74
2023-01-05     101
              ... 
2023-04-06    2134
2023-04-07    2156
2023-04-08    2174
2023-04-09    2200
2023-04-10    2228
Name: Temperature, Length: 100, dtype: int32

In [22]:
# 32. Select data for all weekends (Saturday and Sunday) from df_time. 
df_time[df_time['Day_of_Week'].isin(['Saturday','Sunday'])]

Unnamed: 0_level_0,Temperature,Stock_Price,Day_of_Week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,16,188.492108,Sunday
2023-01-07,10,1041.667864,Saturday
2023-01-08,21,1221.374642,Sunday
2023-01-14,17,2203.876392,Saturday
2023-01-15,27,2364.336413,Sunday
2023-01-21,12,3285.686173,Saturday
2023-01-22,29,3396.506241,Sunday
2023-01-28,24,4199.4293,Saturday
2023-01-29,31,4374.142937,Sunday
2023-02-04,34,5104.1768,Saturday


In [23]:
# 33. Downsample the data to a weekly frequency, taking the maximum value for 'Temperature' and the last value for 'Stock_Price'.
df_time.resample('W').agg({'Temperature':'max','Stock_Price':'last'})

Unnamed: 0_level_0,Temperature,Stock_Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,16,188.492108
2023-01-08,27,1221.374642
2023-01-15,28,2364.336413
2023-01-22,34,3396.506241
2023-01-29,33,4374.142937
2023-02-05,34,5288.301613
2023-02-12,33,6296.100135
2023-02-19,33,7497.522998
2023-02-26,31,8568.450985
2023-03-05,31,9537.475609


In [24]:
# 34. Calculate the 30-day rolling standard deviation of 'Stock_Price'. 
df_time['Stock_Price'].rolling(30).std()

Date
2023-01-01            NaN
2023-01-02            NaN
2023-01-03            NaN
2023-01-04            NaN
2023-01-05            NaN
                 ...     
2023-04-06    1284.859637
2023-04-07    1278.052702
2023-04-08    1271.385394
2023-04-09    1268.349020
2023-04-10    1262.683516
Name: Stock_Price, Length: 100, dtype: float64

In [25]:
# 35. Create a new DataFrame by shifting the 'Stock_Price' column forward by one day. 
df_shifted = df_time[['Stock_Price']].shift(1)
df_shifted.head()

Unnamed: 0_level_0,Stock_Price
Date,Unnamed: 1_level_1
2023-01-01,
2023-01-02,188.492108
2023-01-03,335.261443
2023-01-04,520.76934
2023-01-05,635.218228


In [27]:
# 36. Fill any missing values in the resampled monthly data (from question 26) using forward fill (ffill). 
monthly_avg = df_time['Stock_Price'].resample('ME').mean()
monthly_avg.ffill()

Date
2023-01-31     2469.710868
2023-02-28     6747.567090
2023-03-31    11247.397555
2023-04-30    14237.071900
Freq: ME, Name: Stock_Price, dtype: float64