In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
from pylab import figure, axes, pie, title, savefig

In [2]:
df = pd.read_csv("/Users/hamin/ITE3035_Python/Data_Science/endingPrice/profit_point/bithumb-2018-04-btc-profit_point_graph_table.csv")

In [3]:
df.dtypes

Unnamed: 0               int64
timestamp               object
point_amount_cumsum    float64
dtype: object

In [4]:
# converting timestamp column type 'object' to 'datetime'
df['timestamp'] = pd.to_datetime(df['timestamp'], format = '%Y-%m-%d %H:%M:%S', errors='raise')
df.dtypes

Unnamed: 0                      int64
timestamp              datetime64[ns]
point_amount_cumsum           float64
dtype: object

In [5]:
## converting timestamp to detailed date column
df['timestamp_year_month_day']       = df['timestamp'].dt.date         # YYYY-MM-DD(문자)
df['timestamp_year']       = df['timestamp'].dt.year         # 연(4자리숫자)
df['timestamp_month']      = df['timestamp'].dt.month        # 월(숫자)
df['timestamp_month_name'] = df['timestamp'].dt.month_name() # 월(문자)
df['timestamp_day']        = df['timestamp'].dt.day          # 일(숫자)
df['timestamp_time']       = df['timestamp'].dt.time         # HH:MM:SS(문자)
df['timestamp_hour']       = df['timestamp'].dt.hour         # 시(숫자)
df['timestamp_minute']     = df['timestamp'].dt.minute       # 분(숫자)
df['timestamp_second']     = df['timestamp'].dt.second       # 초(숫자)

In [6]:
df

Unnamed: 0.1,Unnamed: 0,timestamp,point_amount_cumsum,timestamp_year_month_day,timestamp_year,timestamp_month,timestamp_month_name,timestamp_day,timestamp_time,timestamp_hour,timestamp_minute,timestamp_second
0,23,2018-04-01 00:24:41,-38857.0,2018-04-01,2018,4,April,1,00:24:41,0,24,41
1,44,2018-04-01 00:46:09,147382.0,2018-04-01,2018,4,April,1,00:46:09,0,46,9
2,53,2018-04-01 00:59:44,16366.0,2018-04-01,2018,4,April,1,00:59:44,0,59,44
3,54,2018-04-01 00:59:51,-35682.0,2018-04-01,2018,4,April,1,00:59:51,0,59,51
4,156,2018-04-01 07:53:19,-5367.0,2018-04-01,2018,4,April,1,07:53:19,7,53,19
...,...,...,...,...,...,...,...,...,...,...,...,...
261,27168,2018-04-30 00:52:05,-66841.0,2018-04-30,2018,4,April,30,00:52:05,0,52,5
262,27367,2018-04-30 12:19:34,-184171.0,2018-04-30,2018,4,April,30,12:19:34,12,19,34
263,27542,2018-04-30 16:43:32,28835.0,2018-04-30,2018,4,April,30,16:43:32,16,43,32
264,27543,2018-04-30 16:43:42,-26159.0,2018-04-30,2018,4,April,30,16:43:42,16,43,42


In [7]:
## creating 'day - amount' connection
group_day_amount = df['point_amount_cumsum'].groupby(df['timestamp_year_month_day'])

In [8]:
## 일별 총 손익량
group_day_amount.sum()

timestamp_year_month_day
2018-04-01      83842.0
2018-04-02    1295437.0
2018-04-03    1246040.0
2018-04-04     914424.0
2018-04-05    1406985.0
2018-04-06     701784.0
2018-04-07     876096.0
2018-04-08     370738.0
2018-04-09    -106319.0
2018-04-10     424393.0
2018-04-11     194524.0
2018-04-12    1282502.0
2018-04-13     741251.0
2018-04-14     488367.0
2018-04-15    1127753.0
2018-04-16     866373.0
2018-04-17     743528.0
2018-04-18     387831.0
2018-04-19     161988.0
2018-04-20    2625019.0
2018-04-21    1977607.0
2018-04-22     397215.0
2018-04-23     704834.0
2018-04-24     820814.0
2018-04-25    2579290.0
2018-04-26     965159.0
2018-04-27     402395.0
2018-04-28     152471.0
2018-04-29     473279.0
2018-04-30    -338172.0
Name: point_amount_cumsum, dtype: float64

In [9]:
## 일별 계산 table 생성
dfs = [group_day_amount.sum()]
dfs

[timestamp_year_month_day
 2018-04-01      83842.0
 2018-04-02    1295437.0
 2018-04-03    1246040.0
 2018-04-04     914424.0
 2018-04-05    1406985.0
 2018-04-06     701784.0
 2018-04-07     876096.0
 2018-04-08     370738.0
 2018-04-09    -106319.0
 2018-04-10     424393.0
 2018-04-11     194524.0
 2018-04-12    1282502.0
 2018-04-13     741251.0
 2018-04-14     488367.0
 2018-04-15    1127753.0
 2018-04-16     866373.0
 2018-04-17     743528.0
 2018-04-18     387831.0
 2018-04-19     161988.0
 2018-04-20    2625019.0
 2018-04-21    1977607.0
 2018-04-22     397215.0
 2018-04-23     704834.0
 2018-04-24     820814.0
 2018-04-25    2579290.0
 2018-04-26     965159.0
 2018-04-27     402395.0
 2018-04-28     152471.0
 2018-04-29     473279.0
 2018-04-30    -338172.0
 Name: point_amount_cumsum, dtype: float64]

In [10]:
day_df_final = reduce(lambda left,right: pd.merge(left,right,on='timestamp_year_month_day'), dfs)
day_df_final.columns = [ "daily profit"]
day_df_final

timestamp_year_month_day
2018-04-01      83842.0
2018-04-02    1295437.0
2018-04-03    1246040.0
2018-04-04     914424.0
2018-04-05    1406985.0
2018-04-06     701784.0
2018-04-07     876096.0
2018-04-08     370738.0
2018-04-09    -106319.0
2018-04-10     424393.0
2018-04-11     194524.0
2018-04-12    1282502.0
2018-04-13     741251.0
2018-04-14     488367.0
2018-04-15    1127753.0
2018-04-16     866373.0
2018-04-17     743528.0
2018-04-18     387831.0
2018-04-19     161988.0
2018-04-20    2625019.0
2018-04-21    1977607.0
2018-04-22     397215.0
2018-04-23     704834.0
2018-04-24     820814.0
2018-04-25    2579290.0
2018-04-26     965159.0
2018-04-27     402395.0
2018-04-28     152471.0
2018-04-29     473279.0
2018-04-30    -338172.0
Name: point_amount_cumsum, dtype: float64

In [11]:
print(day_df_final.asfreq('D'))

timestamp_year_month_day
2018-04-01      83842.0
2018-04-02    1295437.0
2018-04-03    1246040.0
2018-04-04     914424.0
2018-04-05    1406985.0
2018-04-06     701784.0
2018-04-07     876096.0
2018-04-08     370738.0
2018-04-09    -106319.0
2018-04-10     424393.0
2018-04-11     194524.0
2018-04-12    1282502.0
2018-04-13     741251.0
2018-04-14     488367.0
2018-04-15    1127753.0
2018-04-16     866373.0
2018-04-17     743528.0
2018-04-18     387831.0
2018-04-19     161988.0
2018-04-20    2625019.0
2018-04-21    1977607.0
2018-04-22     397215.0
2018-04-23     704834.0
2018-04-24     820814.0
2018-04-25    2579290.0
2018-04-26     965159.0
2018-04-27     402395.0
2018-04-28     152471.0
2018-04-29     473279.0
2018-04-30    -338172.0
Freq: D, Name: point_amount_cumsum, dtype: float64


In [12]:
day_df_final.index = pd.DatetimeIndex(day_df_final.index)

In [13]:
year=2018
i = 1
idx = pd.date_range(start = "{}-{}-1".format(year, i), periods = pd.Period("{}-{}-1".format(year, i)).days_in_month)
print(idx)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
               '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
               '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
               '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
               '2018-01-29', '2018-01-30', '2018-01-31'],
              dtype='datetime64[ns]', freq='D')


In [14]:
day_df_final.to_csv("/Users/hamin/ITE3035_Python/Data_Science/endingPrice/dailyProfit/dailyProfit04(zero_to_zero).csv", header=True, index=True, encoding = 'utf-8')