In [1]:
import numpy as np
import pandas as pd
import pytz
from pytz import all_timezones

# 문자열을 날짜로 변환

In [2]:
date_string = np.array(['03-04-2023 11:35 PM', '05-04-2023 11:00 PM', '07-04-2023 02:00 AM'])

In [3]:
for date in date_string:
    temp = pd.to_datetime(date, format='%d-%m-%Y %I:%M %p')
    print(temp)

for date in date_string:
    temp_val = pd.to_datetime(date, format='%d-%m-%Y %I:%M %p', errors='ignore')

2023-04-03 23:35:00
2023-04-05 23:00:00
2023-04-07 02:00:00


# 시간대 데이터 처리

In [4]:
pd.Timestamp('2023-01-01 06:00:00', tz='Europe/London')
date = pd.Timestamp('2023-04-20 07:00:00')
print(date)

2023-04-20 07:00:00


In [5]:
date_in_london = date.tz_localize('Europe/London')
print(date_in_london)

2023-04-20 07:00:00+01:00


In [6]:
date_in_africa = date_in_london.tz_convert('Africa/Abidjan')
print(date_in_africa)

2023-04-20 06:00:00+00:00


In [7]:
dates = pd.Series(pd.date_range('2/2/2023', periods=3, freq='M'))
temp = dates.dt.tz_localize('Africa/Abidjan')
print(temp)

0   2023-02-28 00:00:00+00:00
1   2023-03-31 00:00:00+00:00
2   2023-04-30 00:00:00+00:00
dtype: datetime64[ns, Africa/Abidjan]


In [8]:
all_timezones[0:10]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau']

In [9]:
dates.dt.tz_localize('dateutil/Asia/Seoul')

0   2023-02-28 00:00:00+09:00
1   2023-03-31 00:00:00+09:00
2   2023-04-30 00:00:00+09:00
dtype: datetime64[ns, tzfile('ROK')]

In [10]:
tz = pytz.timezone('Asia/Seoul')
temp01 = dates.dt.tz_localize(tz)
print(temp01)

0   2023-02-28 00:00:00+09:00
1   2023-03-31 00:00:00+09:00
2   2023-04-30 00:00:00+09:00
dtype: datetime64[ns, Asia/Seoul]


# 날짜와 시간 선택하기

In [11]:
dateframe = pd.DataFrame()
dateframe['date'] = pd.date_range('01/01/2023', periods=1000, freq='H')
print(dateframe)

                   date
0   2023-01-01 00:00:00
1   2023-01-01 01:00:00
2   2023-01-01 02:00:00
3   2023-01-01 03:00:00
4   2023-01-01 04:00:00
..                  ...
995 2023-02-11 11:00:00
996 2023-02-11 12:00:00
997 2023-02-11 13:00:00
998 2023-02-11 14:00:00
999 2023-02-11 15:00:00

[1000 rows x 1 columns]


In [12]:
dateframe[(dateframe['date'] > '2023-01-01 01:00:00') & (dateframe['date'] <= '2023-01-01 04:00:00')]

Unnamed: 0,date
2,2023-01-01 02:00:00
3,2023-01-01 03:00:00
4,2023-01-01 04:00:00


In [13]:
dateframe = dateframe.set_index(dateframe['date'])
temp = dateframe.loc['2023-01-01 01:00:00' :'2023-01-01 04:00:00' ]
print(temp)

                                   date
date                                   
2023-01-01 01:00:00 2023-01-01 01:00:00
2023-01-01 02:00:00 2023-01-01 02:00:00
2023-01-01 03:00:00 2023-01-01 03:00:00
2023-01-01 04:00:00 2023-01-01 04:00:00


# 날짜 데이터를 여러 특성으로 분할

In [17]:
dataframe = pd.DataFrame()

dataframe['date'] = pd.date_range('1/1/2023', periods=5,freq='W')
print(dataframe)

dataframe['year'] = dataframe['date'].dt.year
dataframe['month'] = dataframe['date'].dt.month
dataframe['day'] = dataframe['date'].dt.day
dataframe['hour'] = dataframe['date'].dt.hour
dataframe['minute'] = dataframe['date'].dt.minute

print(dataframe)

        date
0 2023-01-01
1 2023-01-08
2 2023-01-15
3 2023-01-22
4 2023-01-29
        date  year  month  day  hour  minute
0 2023-01-01  2023      1    1     0       0
1 2023-01-08  2023      1    8     0       0
2 2023-01-15  2023      1   15     0       0
3 2023-01-22  2023      1   22     0       0
4 2023-01-29  2023      1   29     0       0


# 날짜간의 차이 계산

In [20]:
df = pd.DataFrame()
df['Arrived'] = [pd.Timestamp('01-01-2023'), pd.Timestamp('01-04-2023')]
df['Left'] = [pd.Timestamp('01-01-2023'), pd.Timestamp('01-06-2023')]
print(df)

     Arrived       Left
0 2023-01-01 2023-01-01
1 2023-01-04 2023-01-06


In [22]:
result = df['Arrived'] - df['Left']
print(result)

0    0 days
1   -2 days
dtype: timedelta64[ns]


In [23]:
# 특성 간의 기간을 계산
pd.Series(delta.days for delta in (result))

0    0
1   -2
dtype: int64

# 시차특성

In [25]:
df = pd.DataFrame()
df['dates'] = pd.date_range('1/1/2023', periods=5, freq='D')
df['stock_price'] = [1.1, 2.2, 3.3, 4.4, 5.5]

df['previous_day_stock_price'] = df['stock_price'].shift(1)
print(df)

       dates  stock_price  previous_day_stock_price
0 2023-01-01          1.1                       NaN
1 2023-01-02          2.2                       1.1
2 2023-01-03          3.3                       2.2
3 2023-01-04          4.4                       3.3
4 2023-01-05          5.5                       4.4


# 이동 시간 윈도 사용

In [31]:
time_index = pd.date_range('1/1/2023', periods=5, freq='M')
df = pd.DataFrame(index=time_index)
df['stockPrice'] = [1,2,3,4,5]
print(df)

rolling = df.rolling(window=2).mean()
ewm = df.ewm(alpha=0.5).mean()
print(rolling,ewm)

            stockPrice
2023-01-31           1
2023-02-28           2
2023-03-31           3
2023-04-30           4
2023-05-31           5
            stockPrice
2023-01-31         NaN
2023-02-28         1.5
2023-03-31         2.5
2023-04-30         3.5
2023-05-31         4.5             stockPrice
2023-01-31    1.000000
2023-02-28    1.666667
2023-03-31    2.428571
2023-04-30    3.266667
2023-05-31    4.161290


# 시계열 누락된 값 처리

In [37]:
time_index = pd.date_range('1/1/2023', periods=5, freq='M')
df = pd.DataFrame(index=time_index)
df['Sales'] = [1.0, 2.0, np.nan, np.nan, 8.0]
print(df)

            Sales
2023-01-31    1.0
2023-02-28    2.0
2023-03-31    NaN
2023-04-30    NaN
2023-05-31    8.0


In [39]:
df.interpolate()   # 누락된 값 보간
df.ffill()   # 앞쪽으로 채우기
df.bfill()   # 뒤쪽으로 채우기
df.interpolate(method = 'quadratic')  # 비선형의 보간방법

print(df.interpolate(limit=1, limit_direction='forward'))
print(df.interpolate(limit=2, limit_direction='backward'))

            Sales
2023-01-31    1.0
2023-02-28    2.0
2023-03-31    4.0
2023-04-30    NaN
2023-05-31    8.0
            Sales
2023-01-31    1.0
2023-02-28    2.0
2023-03-31    4.0
2023-04-30    6.0
2023-05-31    8.0
