In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
from pylab import figure, axes, pie, title, savefig

In [2]:
df = pd.read_csv("/Users/hamin/bithumb_bot_data/include/2018-new/2018-05-btc-krw.csv")

In [3]:
df.dtypes

timestamp     object
quantity     float64
price          int64
fee            int64
amount         int64
side           int64
dtype: object

In [4]:
# converting timestamp column type 'object' to 'datetime'
df['timestamp'] = pd.to_datetime(df['timestamp'], format = '%Y-%m-%d %H:%M:%S', errors='raise')
df.dtypes

timestamp    datetime64[ns]
quantity            float64
price                 int64
fee                   int64
amount                int64
side                  int64
dtype: object

In [5]:
## converting timestamp to detailed date column
df['timestamp_year_month_day']       = df['timestamp'].dt.date         # YYYY-MM-DD(문자)
df['timestamp_year']       = df['timestamp'].dt.year         # 연(4자리숫자)
df['timestamp_month']      = df['timestamp'].dt.month        # 월(숫자)
df['timestamp_month_name'] = df['timestamp'].dt.month_name() # 월(문자)
df['timestamp_day']        = df['timestamp'].dt.day          # 일(숫자)
df['timestamp_time']       = df['timestamp'].dt.time         # HH:MM:SS(문자)
df['timestamp_hour']       = df['timestamp'].dt.hour         # 시(숫자)
df['timestamp_minute']     = df['timestamp'].dt.minute       # 분(숫자)
df['timestamp_second']     = df['timestamp'].dt.second       # 초(숫자)

In [6]:
df

Unnamed: 0,timestamp,quantity,price,fee,amount,side,timestamp_year_month_day,timestamp_year,timestamp_month,timestamp_month_name,timestamp_day,timestamp_time,timestamp_hour,timestamp_minute,timestamp_second
0,2018-05-01 01:06:00,0.8064,10164000,0,-8196250,0,2018-05-01,2018,5,May,1,01:06:00,1,6,0
1,2018-05-01 01:06:00,0.1000,10163000,0,1016300,1,2018-05-01,2018,5,May,1,01:06:00,1,6,0
2,2018-05-01 01:06:00,0.1000,10163000,0,1016300,1,2018-05-01,2018,5,May,1,01:06:00,1,6,0
3,2018-05-01 01:06:00,0.3382,10162000,0,3436788,1,2018-05-01,2018,5,May,1,01:06:00,1,6,0
4,2018-05-01 01:06:00,0.1000,10153000,0,1015300,1,2018-05-01,2018,5,May,1,01:06:00,1,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23290,2018-05-31 23:19:00,0.2509,8338000,0,2092004,1,2018-05-31,2018,5,May,31,23:19:00,23,19,0
23291,2018-05-31 23:21:00,0.0390,8342000,0,325338,1,2018-05-31,2018,5,May,31,23:21:00,23,21,0
23292,2018-05-31 23:21:00,0.4258,8342000,0,3552024,1,2018-05-31,2018,5,May,31,23:21:00,23,21,0
23293,2018-05-31 23:21:00,0.1126,8342000,0,939309,1,2018-05-31,2018,5,May,31,23:21:00,23,21,0


In [7]:
## creating 'day - amount' connection
group_day_amount = df['amount'].groupby(df['timestamp_year_month_day'])

In [8]:
## 일별 총 손익량
group_day_amount.sum()

timestamp_year_month_day
2018-05-01   -17802234
2018-05-02     8295332
2018-05-03    10138584
2018-05-04      545803
2018-05-05      340552
2018-05-06   -15021496
2018-05-07    -9802781
2018-05-08    -4478454
2018-05-09    32020288
2018-05-10   -15454910
2018-05-11    15948021
2018-05-12      716218
2018-05-13      506235
2018-05-14      748015
2018-05-15     -642716
2018-05-16   -33671859
2018-05-17    35135338
2018-05-18      827709
2018-05-19      722379
2018-05-20   -26828987
2018-05-21    28321486
2018-05-22   -23719995
2018-05-23    23599623
2018-05-24   -12506828
2018-05-25    12749313
2018-05-26   -13815553
2018-05-27     3720588
2018-05-28    -5858398
2018-05-29    16548664
2018-05-30      187538
2018-05-31      531831
Name: amount, dtype: int64

In [9]:
## 일별 계산 table 생성
dfs = [group_day_amount.sum()]
dfs

[timestamp_year_month_day
 2018-05-01   -17802234
 2018-05-02     8295332
 2018-05-03    10138584
 2018-05-04      545803
 2018-05-05      340552
 2018-05-06   -15021496
 2018-05-07    -9802781
 2018-05-08    -4478454
 2018-05-09    32020288
 2018-05-10   -15454910
 2018-05-11    15948021
 2018-05-12      716218
 2018-05-13      506235
 2018-05-14      748015
 2018-05-15     -642716
 2018-05-16   -33671859
 2018-05-17    35135338
 2018-05-18      827709
 2018-05-19      722379
 2018-05-20   -26828987
 2018-05-21    28321486
 2018-05-22   -23719995
 2018-05-23    23599623
 2018-05-24   -12506828
 2018-05-25    12749313
 2018-05-26   -13815553
 2018-05-27     3720588
 2018-05-28    -5858398
 2018-05-29    16548664
 2018-05-30      187538
 2018-05-31      531831
 Name: amount, dtype: int64]

In [10]:
day_df_final = reduce(lambda left,right: pd.merge(left,right,on='timestamp_year_month_day'), dfs)
day_df_final.columns = [ "daily profit"]
day_df_final

timestamp_year_month_day
2018-05-01   -17802234
2018-05-02     8295332
2018-05-03    10138584
2018-05-04      545803
2018-05-05      340552
2018-05-06   -15021496
2018-05-07    -9802781
2018-05-08    -4478454
2018-05-09    32020288
2018-05-10   -15454910
2018-05-11    15948021
2018-05-12      716218
2018-05-13      506235
2018-05-14      748015
2018-05-15     -642716
2018-05-16   -33671859
2018-05-17    35135338
2018-05-18      827709
2018-05-19      722379
2018-05-20   -26828987
2018-05-21    28321486
2018-05-22   -23719995
2018-05-23    23599623
2018-05-24   -12506828
2018-05-25    12749313
2018-05-26   -13815553
2018-05-27     3720588
2018-05-28    -5858398
2018-05-29    16548664
2018-05-30      187538
2018-05-31      531831
Name: amount, dtype: int64

In [11]:
print(day_df_final.asfreq('D'))

timestamp_year_month_day
2018-05-01   -17802234
2018-05-02     8295332
2018-05-03    10138584
2018-05-04      545803
2018-05-05      340552
2018-05-06   -15021496
2018-05-07    -9802781
2018-05-08    -4478454
2018-05-09    32020288
2018-05-10   -15454910
2018-05-11    15948021
2018-05-12      716218
2018-05-13      506235
2018-05-14      748015
2018-05-15     -642716
2018-05-16   -33671859
2018-05-17    35135338
2018-05-18      827709
2018-05-19      722379
2018-05-20   -26828987
2018-05-21    28321486
2018-05-22   -23719995
2018-05-23    23599623
2018-05-24   -12506828
2018-05-25    12749313
2018-05-26   -13815553
2018-05-27     3720588
2018-05-28    -5858398
2018-05-29    16548664
2018-05-30      187538
2018-05-31      531831
Freq: D, Name: amount, dtype: int64


In [12]:
day_df_final.index = pd.DatetimeIndex(day_df_final.index)

In [13]:
year=2018
i = 1
idx = pd.date_range(start = "{}-{}-1".format(year, i), periods = pd.Period("{}-{}-1".format(year, i)).days_in_month)
print(idx)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
               '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
               '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
               '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
               '2018-01-29', '2018-01-30', '2018-01-31'],
              dtype='datetime64[ns]', freq='D')


In [14]:
day_df_final.to_csv("/Users/hamin/bithumb_bot_data/endingPrice/dailyProfit/dailyProfit05.csv", header=True, index=True, encoding = 'utf-8')