In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./ETH_1h-datetime.csv')
df

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume
0,2020-03-13 08-PM,ETHUSD,129.94,131.82,126.87,128.71,1940673.93
1,2020-03-13 07-PM,ETHUSD,119.51,132.02,117.10,129.94,7579741.09
2,2020-03-13 06-PM,ETHUSD,124.47,124.85,115.50,119.51,4898735.81
3,2020-03-13 05-PM,ETHUSD,124.08,127.42,121.63,124.47,2753450.92
4,2020-03-13 04-PM,ETHUSD,124.85,129.51,120.17,124.08,4461424.71
...,...,...,...,...,...,...,...
23669,2017-07-01 03-PM,ETHUSD,265.74,272.74,265.00,272.57,1500282.55
23670,2017-07-01 02-PM,ETHUSD,268.79,269.90,265.00,265.74,1702536.85
23671,2017-07-01 01-PM,ETHUSD,274.83,274.93,265.00,268.79,3010787.99
23672,2017-07-01 12-PM,ETHUSD,275.01,275.01,271.00,274.83,824362.87


In [3]:
df['Date'].day_name()

AttributeError: 'Series' object has no attribute 'day_name'

In [None]:
## Somehow not working as expected
date_parser = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %I-%p')
df_datetime = pd.read_csv('./ETH_1h-datetime.csv', parse_dates=['Date'], date_parser=date_parser)
df_datetime


In [None]:
df = pd.read_csv('./ETH-USD-2year-datetime.csv')
df

In [None]:
df['Date']

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df['Date']

In [None]:
df.loc[0, 'Date'].day_name()

In [None]:
df['DayName'] = df['Date'].dt.day_name()
df

In [None]:
df['Date'].min()

In [None]:
df['Date'].max()

In [None]:
# date_delta
df['Date'].max() - df['Date'].min()

In [None]:
date_filter = (df['Date'] > pd.to_datetime('2022-12-31'))

df.loc[date_filter]

In [None]:
df.set_index('Date', inplace=True)

In [None]:
df.loc['2022']

In [None]:
df.loc['2023-01':'2023-01']

In [None]:
df.loc['2023-01':'2023-02']['Close'].mean()

In [None]:
df.loc['2023-01-01']['High'].max()

In [None]:
df.loc['2023-01-01']['Low'].max()

In [None]:
# GET High values of each day 
each_day_highs = df['High'].resample(rule='D').max()

# - df['High']: This selects the 'High' column from the DataFrame df.
# - resample(): This is a method applied to a time series, typically a column with a datetime index. It allows you to change the frequency of the data.

# - 'D' is the resampling rule, indicating daily frequency. 
# You can use various frequency strings like 'D' for day, 'H' for hour, 'W' for week, etc.
# Ref: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects


# - mean() is an aggregation function that specifies how to handle the values in each new time bucket. 
# In this case, it calculates the mean of the values in each daily bucket.

each_day_highs

In [None]:
%matplotlib inline

In [None]:
each_day_highs.plot()

In [None]:
df.resample(rule='W').agg({'High': 'max', 'Low': 'min', 'Close': 'mean', 'Volume': 'sum'})

# - resample(rule='W'): This part is resampling the DataFrame based on a weekly frequency ('W' stands for weekly). 
# It groups the data into weekly bins.

# - agg({'High': 'max', 'Low': 'min', 'Close': 'mean', 'Volume': 'sum'}): 
# This is using the .agg method to specify different aggregation functions for each column.

# - 'High': 'max': For the 'High' column, it calculates the maximum value within each weekly bin.
# - 'Low': 'min': For the 'Low' column, it calculates the minimum value within each weekly bin.
# - 'Close': 'mean': For the 'Close' column, it calculates the mean (average) value within each weekly bin.
# - 'Volume': 'sum': For the 'Volume' column, it calculates the sum of values within each weekly bin.

In [None]:
import matplotlib.pyplot as plt
resampled_data = df.resample(rule='M').agg({'High': 'max', 'Low': 'min', 'Close': 'mean', 'Volume': 'sum'})

# Plotting
plt.figure(figsize=(10, 6))

# High values
plt.plot(resampled_data.index, resampled_data['High'], label='High', marker='')

# Low values
plt.plot(resampled_data.index, resampled_data['Low'], label='Low', marker='')

# Close values
plt.plot(resampled_data.index, resampled_data['Close'], label='Close', marker='')

# Adding labels and title
plt.xlabel('Month')
plt.ylabel('Values')
plt.title('Monthly Resampled Data')
plt.legend()
plt.grid(True)
plt.show()