Skip to content

Commit

Permalink
Clean up py codes
Browse files Browse the repository at this point in the history
  • Loading branch information
gogotanaka committed Aug 30, 2017
1 parent 231f756 commit 2d8aa27
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 18 deletions.
7 changes: 6 additions & 1 deletion README.md
Expand Up @@ -2,4 +2,9 @@

$ docker build -t dataset:importer .
$ mkdir -p data/jp
$ docker run -i -t dataset:importer bash -l -c "python /src/jp/bank/all.py"
$ docker run -i -t dataset:importer bash -l -c "python /src/jp/bank/all.py"


# Debug
$ docker ps
$ docker exec -it cdebd3c2ec80 bash
13 changes: 7 additions & 6 deletions jp/python/bank/all.py
Expand Up @@ -2,23 +2,24 @@
import csv
import os
import requests
import zipfile

URL = 'http://ykaku.com/ginkokensaku/ginkositen.zip'
OUTPUT_DIR = '/src/data'
OUTPUT_FILE_PATH = OUTPUT_DIR + '/output.csv'
URL = 'http://ykaku.com/ginkokensaku/ginkositen.zip'
OUTPUT_FILE_PATH = '/src/data/output.csv'

# GET raw data
r = requests.get(URL)
with open(OUTPUT_DIR + '/raw.zip', 'wb') as file:
with open('/tmp/raw.zip', 'wb') as file:
file.write(r.content)

# UNZIP data
os.system('unzip ' + OUTPUT_DIR + '/raw.zip -d ' + OUTPUT_DIR)
with zipfile.ZipFile('/tmp/raw.zip', 'r') as zip_ref:
zip_ref.extractall('/tmp')

# CONVERT data into CSV
with open(OUTPUT_FILE_PATH, 'w', newline='') as output:
spamwriter = csv.writer(output, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
with codecs.open(OUTPUT_DIR + '/ginkositen.txt', 'r', 'shiftjis') as input:
with codecs.open('/tmp/ginkositen.txt', 'r', 'shiftjis') as input:
spamreader = csv.reader(input, delimiter=',', quotechar='"')
for row in spamreader:
spamwriter.writerow([x.strip() for x in row])
31 changes: 31 additions & 0 deletions jp/python/currency/jpy_usd_daily.py
@@ -0,0 +1,31 @@
import codecs
import csv
from datetime import datetime
import os
import requests
import zipfile

# '2014-12-23' => '1419292800'
def str2unixtime(s):
try:
datetime.strptime(s, '%Y-%m-%d').strftime("%s")
except ValueError:
print('ValueError')
''

# Please update your quandl key
URL = 'https://www.quandl.com/api/v3/datasets/CUR/JPY.csv?api_key=mCkqGja_5orzQJxF5RhQ'
OUTPUT_FILE_PATH = '/src/data/output.csv'

# GET raw data
r = requests.get(URL)
with open('/tmp/raw.csv', 'wb') as file:
file.write(r.content)

# CONVERT data into CSV along with converting date to unixtime
with open(OUTPUT_FILE_PATH, 'w', newline='') as output:
spamwriter = csv.writer(output, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
with codecs.open('/tmp/raw.csv', 'r') as input:
spamreader = csv.reader(input, delimiter=',', quotechar='"')
for row in spamreader:
spamwriter.writerow([str2unixtime(row[0]), row[1]])
13 changes: 7 additions & 6 deletions jp/python/postalcode/all.py
Expand Up @@ -2,23 +2,24 @@
import csv
import os
import requests
import zipfile

URL = 'http://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip'
OUTPUT_DIR = '/tmp/build'
OUTPUT_FILE_PATH = OUTPUT_DIR + '/output.csv'
URL = 'http://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip'
OUTPUT_FILE_PATH = '/src/data/output.csv'

# GET raw data
r = requests.get(URL)
with open(OUTPUT_DIR + '/raw.zip', 'wb') as file:
with open('/tmp/raw.zip', 'wb') as file:
file.write(r.content)

# UNZIP data
os.system('unzip ' + OUTPUT_DIR + '/raw.zip -d ' + OUTPUT_DIR)
with zipfile.ZipFile('/tmp/raw.zip', 'r') as zip_ref:
zip_ref.extractall('/tmp')

# CONVERT data into CSV
with open(OUTPUT_FILE_PATH, 'w', newline='') as output:
spamwriter = csv.writer(output, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
with codecs.open(OUTPUT_DIR + '/KEN_ALL.CSV', 'r', 'shiftjis') as input:
with codecs.open('/tmp/KEN_ALL.CSV', 'r', 'shiftjis') as input:
spamreader = csv.reader(input, delimiter=',', quotechar='"')
for row in spamreader:
spamwriter.writerow([x.strip() for x in row])
10 changes: 5 additions & 5 deletions jp/python/stock/ni255_daily.py
Expand Up @@ -2,17 +2,17 @@
import csv
import os
import requests
import zipfile

URL = 'http://k-db.com/indices/I101?download=csv'
OUTPUT_DIR = '/tmp/build'
OUTPUT_FILE_PATH = OUTPUT_DIR + '/output.csv'
URL = 'http://k-db.com/indices/I101?download=csv'
OUTPUT_FILE_PATH = '/src/data/output.csv'

# GET raw data
r = requests.get(URL)
with open(OUTPUT_DIR + '/raw.csv', 'wb') as file:
with open('/tmp/raw.csv', 'wb') as file:
file.write(r.content)

# CONVERT data into utf8 CSV
with open(OUTPUT_FILE_PATH, 'w', newline='') as output:
with codecs.open(OUTPUT_DIR + '/raw.csv', 'r', 'shiftjis') as input:
with codecs.open('/tmp/raw.csv', 'r', 'shiftjis') as input:
output.write(input.read())

0 comments on commit 2d8aa27

Please sign in to comment.