In [11]:
import sys
import datetime
import re
from typing import List
import csv

def load_geimin_csv(index: int) -> List: 
    file_name = f"geimin_{index}.csv"
    with open(file_name, "r") as f:
        reader = csv.reader(f)
        rows = []
        for row in reader:
            rows.append(row)
        return rows


In [14]:
geimin0 = load_geimin_csv(0)

In [16]:
geimin0[1]

['517',
 'PS 20918|GB 10867|N64 2965|SS 858',
 '『サンパギータ』|PS、SCE、1998/10/15、4800円、70524本（初）|（1998/10/12〜10/18）',
 '『ZEUS\u3000CARNAGE HEART SECOND』|PS、アートディンク、32点(8/8/7/9)|『ヴァンパイアセイヴァーEXエディション』|PS、カプコン、32点(8/8/8/8)',
 '●―']

In [149]:
def parse_hw_cell(a_cell):
    a_cell = a_cell.replace('（2週分）', '')
    hwlines = a_cell.split('|')
    hwdata = {}
    for d in hwlines:
        if '不明' in d:
            continue
        hw_raw_data = d.split(' ')
        if len(hw_raw_data) == 2:
            hwdata[hw_raw_data[0]] = hw_raw_data[1]
    return hwdata

In [143]:
def parse_date(a_cell):
    res = re.search('([0-9]+)/([0-9]+)/([0-9]+)〜([0-9]+)/([0-9]+)/([0-9]+)', a_cell)
    if res:
        (begin_y, begin_m, begin_d, end_y, end_m, end_d) = res.groups()
    else:
        res = re.search('([0-9]+)/([0-9]+)/([0-9]+)〜([0-9]+)/([0-9]+)', a_cell)
        if res:
            (begin_y, begin_m, begin_d, end_m, end_d) = res.groups()
            end_y = begin_y
           
    begin_date = datetime.date(int(begin_y), int(begin_m), int(begin_d))
    end_date = datetime.date(int(end_y), int(end_m), int(end_d))
    return [begin_date, end_date]

In [124]:
def parse_row(cells):
    hwdata = parse_hw_cell(cells[1])
    begin_date, end_date = parse_date(cells[2])
    return {"begin": begin_date,
            "end": end_date,
            "hw" : hwdata}
   

In [156]:
row_data = parse_row(geimin0[60])
row_data

{'begin': datetime.date(1999, 12, 20),
 'end': datetime.date(2000, 1, 2),
 'hw': {'PS': '110131',
  'DC': '83779',
  'N64': '62741',
  'GBC': '350915',
  'NGP': '14187',
  'WS': '68381',
  'ps': '211803'}}

In [147]:
def datalines(row_data) -> List:
    lines = []
    for hw_name in row_data["hw"].keys():
        lines.append([row_data["begin"],
                      row_data["end"],
                      hw_name,
                      row_data["hw"][hw_name]])
    return lines
            

In [148]:
datalines(row_data)

[[datetime.date(1998, 12, 21), datetime.date(1999, 1, 3), 'GBC', '306276'],
 [datetime.date(1998, 12, 21), datetime.date(1999, 1, 3), 'PS', '276371'],
 [datetime.date(1998, 12, 21), datetime.date(1999, 1, 3), 'N64', '193044'],
 [datetime.date(1998, 12, 21), datetime.date(1999, 1, 3), 'DC', '134020'],
 [datetime.date(1998, 12, 21), datetime.date(1999, 1, 3), 'GB', '106468'],
 [datetime.date(1998, 12, 21), datetime.date(1999, 1, 3), 'NGP', '4272']]

In [25]:

hwlines = geimin0[1][1].split('|')
hwdata = {}
for d in hwlines:
    (hw_name, hw_count) = d.split(' ')
    hwdata[hw_name] = hw_count

hwdata

{'PS': '20918', 'GB': '10867', 'N64': '2965', 'SS': '858'}

In [69]:
ss = geimin0[1][2]
print(ss)
repr(ss)

『サンパギータ』|PS、SCE、1998/10/15、4800円、70524本（初）|（1998/10/12〜10/18）


"'『サンパギータ』|PS、SCE、1998/10/15、4800円、70524本（初）|（1998/10/12〜10/18）'"

In [142]:
sx = geimin0[11][2]
print(sx)
res = re.search('([0-9]+)/([0-9]+)/([0-9]+)〜([0-9]+)/([0-9]+)', sx)
(begin_y, begin_m, begin_d, end_m, end_d) = res.groups()
print(res.groups())

『チョコボの不思議なダンジョン2』|PS、スクウェア、1998/12/23、6800円、427842本（初）|（1998/12/21〜1999/1/3）
('1998', '12', '21', '1999', '1')


In [140]:
begin_m

'12'

In [74]:
sz = '1998/10/12〜10/18'
ss = sz
m = re.match('([0-9]+)/([0-9]+)/([0-9]+)〜([0-9]+)/([0-9]+)', ss)
repr(m)

"<re.Match object; span=(0, 16), match='1998/10/12〜10/18'>"

In [158]:
for i in range(10):
    print(i)
    

0
1
2
3
4
5
6
7
8
9
