In [1]:
# Load Modules

import pandas as pd
import json

### Load Company List

In [2]:
# Load code company list
c2c = pd.read_csv("code_comp_list.csv")
c2c['종목코드'] = c2c['종목코드'].map("{:06d}".format)

In [3]:
c2c.head()

Unnamed: 0,종목코드,기업명
0,20,AJ네트웍스
1,40,AJ렌터카
2,50,AK홀딩스
3,60,BGF
4,70,BGF리테일


In [4]:
code2comp = dict()
comp2code = dict()
for idx, row in c2c.iterrows():
    code2comp[str(row['종목코드'])] = row['기업명']
    comp2code[row['기업명']] = row['종목코드']

In [5]:
code_list = list(code2comp.keys())
len(code_list)

788

### Combine Keyword Data and Price Data

In [8]:
# Combine Search Data and Price Data 
# For All Companies In List

for code in code_list:  
    print("=" * 40)
    print("Processing ", code)
    
    # Filepath
    price_filepath = '../price/' + code + '.csv'
    keyword_filepath = '../keyword/' + code + '.csv'
    
    # Process Price Data
    price_df = pd.read_csv(price_filepath)
    if price_df.empty:
        continue
    price_df = price_df[['날짜', '종가', '거래량']]
    price_df = price_df.rename(columns={'날짜': 'period', '종가': 'price', '거래량': 'volume'}) # Make column names consistent
    price_df = price_df.set_index('period')
    # print(price_df.head())
    
    # Process Search Data
    keyword_df = pd.read_csv(keyword_filepath)
    if keyword_df.empty:
        continue
    keyword_df = keyword_df.set_index('period')
    
    # Combine
    combined_df = pd.concat([price_df, keyword_df], axis=1)
    combined_df = combined_df.dropna()
    print(combined_df.head())
    
    # Save
    save_path = '../test/' + code + '.csv'
    combined_df.to_csv(save_path)

Processing  000020
             price    volume     ratio
period                                
2016-01-01  8140.0  166761.0   1.74287
2016-01-02  8140.0  166761.0   2.49250
2016-01-03  8140.0  166761.0   1.94902
2016-01-04  8140.0  281440.0  13.02473
2016-01-05  8190.0  243179.0  17.14767
Processing  000040
             price     volume    ratio
period                                
2016-01-01  1270.0  5544008.0  1.67232
2016-01-02  1270.0  5544008.0  1.73974
2016-01-03  1270.0  5544008.0  1.78352
2016-01-04  1305.0  6053163.0  3.08198
2016-01-05  1310.0  9674168.0  2.69849
Processing  000050
               price  volume     ratio
period                                
2016-01-01  192500.0  1201.0   4.78709
2016-01-02  192500.0  1201.0   4.20523
2016-01-03  192500.0  1201.0   5.34250
2016-01-04  180000.0  3307.0  31.42025
2016-01-05  182500.0  1004.0  32.71621
Processing  000060
              price    volume    ratio
period                                
2016-01-01  16150.0  128635

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





Processing  000370
             price    volume     ratio
period                                
2016-01-01  7800.0  250585.0   3.19664
2016-01-02  7800.0  250585.0   3.93929
2016-01-03  7800.0  250585.0   3.97158
2016-01-04  7750.0  296545.0  20.24539
2016-01-05  7880.0  200406.0  21.21407
Processing  000390
              price   volume     ratio
period                                
2016-01-01  12700.0  55566.0   3.47578
2016-01-02  12700.0  55566.0   4.04558
2016-01-03  12700.0  55566.0   4.21652
2016-01-04  12100.0  65461.0  17.15099
2016-01-05  12300.0  74484.0  18.40455
Processing  000400
             price    volume    ratio
period                               
2016-01-01  2995.0  338653.0  0.23306
2016-01-02  2995.0  338653.0  0.22314
2016-01-03  2995.0  338653.0  0.24546
2016-01-04  2985.0  491644.0  0.91490
2016-01-05  2930.0  464778.0  0.71903
Processing  000430
             price   volume    ratio
period                              
2016-01-01  4840.0  11195.0  0.13676


               price   volume     ratio
period                                 
2016-01-01  251000.0  65136.0  10.22344
2016-01-02  251000.0  65136.0  11.08618
2016-01-03  251000.0  65136.0  12.58735
2016-01-04  244000.0  60411.0  53.26546
2016-01-05  240500.0  79105.0  50.49607
Processing  001060
              price    volume     ratio
period                                 
2016-01-01  36500.0  108835.0   2.37819
2016-01-02  36500.0  108835.0   2.63921
2016-01-03  36500.0  108835.0   2.23317
2016-01-04  36350.0  109152.0  10.70185
2016-01-05  37100.0  107549.0  11.51392
Processing  001070
              price  volume    ratio
period                              
2016-01-01  64100.0  1751.0  1.45228
2016-01-02  64100.0  1751.0  1.22505
2016-01-03  64100.0  1751.0  1.63999
2016-01-04  63600.0  2134.0  8.51610
2016-01-05  65600.0  2411.0  8.89152
Processing  001080
              price  volume     ratio
period                               
2016-01-01  17950.0   595.0   3.52795
2016-01-02

Processing  001570
             price    volume     ratio
period                                
2016-01-01  1870.0  225807.0   4.16666
2016-01-02  1870.0  225807.0   4.37763
2016-01-03  1870.0  225807.0   4.45675
2016-01-04  1855.0  216672.0  20.12130
2016-01-05  1895.0  281113.0  21.05748
Processing  001620
             price    volume    ratio
period                               
2016-01-01  2570.0  308092.0  3.52473
2016-01-02  2570.0  308092.0  2.04310
2016-01-03  2570.0  308092.0  2.76164
2016-01-04  2565.0   74111.0  2.26423
2016-01-05  2545.0   82403.0  1.88435
Processing  001630
               price   volume     ratio
period                                 
2016-01-01  111000.0  63032.0   9.92028
2016-01-02  111000.0  63032.0   6.02302
2016-01-03  111000.0  63032.0   7.17449
2016-01-04  106500.0  33796.0  35.96102
2016-01-05  114500.0  54966.0  35.51815
Processing  001680
              price    volume     ratio
period                                 
2016-01-01  32600.0  1023

Processing  002360
             price    volume    ratio
period                               
2016-01-01  1440.0  632286.0  0.92105
2016-01-02  1440.0  632286.0  1.60390
2016-01-03  1440.0  632286.0  1.08482
2016-01-04  1410.0  903046.0  2.43166
2016-01-05  1410.0  856358.0  2.10711
Processing  002380
               price   volume    ratio
period                                
2016-01-01  417500.0  31047.0  1.63164
2016-01-02  417500.0  31047.0  1.92830
2016-01-03  417500.0  31047.0  2.12608
2016-01-04  406000.0  31003.0  9.34487
2016-01-05  410000.0  18859.0  7.76266
Processing  002390
              price   volume    ratio
2017-01-17  24100.0  15141.0  0.18982
2017-02-24  23450.0  18168.0  0.26575
2017-03-19  23150.0   7687.0  0.22779
2017-03-20  23150.0  11459.0  0.18982
2017-03-23  23450.0  18984.0  0.22779
Processing  002410
              price   volume    ratio
period                               
2016-01-01  13200.0  16490.0  1.64414
2016-01-02  13200.0  16490.0  1.44907
2016-

2016-01-05  53600.0  9212.0  3.03155
Processing  003060
             price     volume     ratio
period                                 
2016-01-01  3205.0  1124958.0   2.41420
2016-01-02  3205.0  1124958.0   3.53152
2016-01-03  3205.0  1124958.0   3.45171
2016-01-04  3140.0  1187386.0  16.38068
2016-01-05  3165.0  1672517.0  16.04150
Processing  003070
              price   volume    ratio
period                               
2016-01-01  14200.0  32592.0  0.12336
2016-01-02  14200.0  32592.0  0.15707
2016-01-03  14200.0  32592.0  0.14005
2016-01-04  13800.0  27494.0  0.42187
2016-01-05  13150.0  30774.0  0.42858
Processing  003080
              price  volume     ratio
period                               
2016-01-01  59000.0  1083.0   6.93430
2016-01-02  59000.0  1083.0  10.40145
2016-01-03  59000.0  1083.0   9.85401
2016-01-04  58300.0   746.0  46.35036
2016-01-05  57800.0   812.0  44.34306
Processing  003090
              price   volume     ratio
period                              

Processing  003670
              price    volume    ratio
period                                
2016-01-01  13700.0  221344.0  1.08753
2016-01-02  13700.0  221344.0  1.27766
2016-01-03  13700.0  221344.0  1.30047
2016-01-04  13650.0  202253.0  6.09171
2016-01-05  13350.0  162361.0  7.74203
Processing  003680
             price   volume     ratio
period                               
2016-01-01  8990.0  23364.0   3.75672
2016-01-02  8990.0  23364.0   3.61928
2016-01-03  8990.0  23364.0   4.42102
2016-01-04  9370.0  38045.0  12.43843
2016-01-05  9040.0  14916.0  12.10628
Processing  003690
              price    volume    ratio
period                                
2016-01-01  14150.0  177464.0  1.74672
2016-01-02  14150.0  177464.0  1.89228
2016-01-03  14150.0  177464.0  2.69286
2016-01-04  13500.0  230975.0  9.60698
2016-01-05  13700.0  181221.0  9.67976
Processing  003720
             price   volume     ratio
period                               
2016-01-01  1785.0  56607.0   4.4612

Processing  004540
             price     volume     ratio
period                                 
2016-01-01  5380.0   180177.0   2.82651
2016-01-02  5380.0   180177.0   3.06762
2016-01-03  5380.0   180177.0   3.75319
2016-01-04  5060.0   352820.0  10.97780
2016-01-05  5550.0  1604082.0  11.97129
Processing  004560
             price   volume     ratio
period                               
2016-01-01  9750.0  23444.0   3.42976
2016-01-02  9750.0  23444.0   3.75782
2016-01-03  9750.0  23444.0   3.66835
2016-01-04  9450.0  53295.0  17.68565
2016-01-05  9490.0  15974.0  17.26811
Processing  004690
               price  volume    ratio
period                               
2016-01-01  102500.0  2186.0  0.76610
2016-01-02  102500.0  2186.0  0.76610
2016-01-03  102500.0  2186.0  1.03378
2016-01-04  101000.0  4266.0  3.78438
2016-01-05  102500.0  3776.0  3.14749
Processing  004700
              price   volume     ratio
period                                
2016-01-01  41900.0  17395.0   4.7

EmptyDataError: No columns to parse from file