## データフォーマットを扱う


### CSV を扱う


In [1]:
import csv

with open("tmpfiles/sample.csv", mode="r", encoding="utf-8") as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

['\ufeffid', '都道府県', '人口(人)', '面積(km2)']
['1', '東京都', '13900000', '2194.5']
['2', '神奈川県', '9200000', '2416.1']
['3', '千葉県', '6200000', '5157.5']
['4', '埼玉県', '7300000', '3797.75']


In [9]:
with open("tmpfiles/sample2.tsv", mode="r") as f:
    reader = csv.reader(f, delimiter="\t")
    for row in reader:
        print(row)

['id', '都道府県', '人口(人)', '面積(km2)']
['1', '東京都', '#13900000#', '#2194.5#']
['2', '神奈川県', '#9200000#', '#2416.1#']
['3', '千葉県', '#6200000#', '#5157.5#']
['4', '埼玉県', '#7300000#', '#3797.75#']


In [10]:
with open("tmpfiles/sample2.tsv", mode="r") as f:
    reader = csv.reader(f, delimiter="\t", quotechar="#")
    for row in reader:
        print(row)

['id', '都道府県', '人口(人)', '面積(km2)']
['1', '東京都', '13900000', '2194.5']
['2', '神奈川県', '9200000', '2416.1']
['3', '千葉県', '6200000', '5157.5']
['4', '埼玉県', '7300000', '3797.75']


In [14]:
# 書き込み
with open("tmpfiles/sample.csv", mode="r") as read_file:
    reader = csv.reader(read_file)
    next(reader)

    with open(
        "tmpfiles/result.tsv", newline="", mode="w", encoding="utf-8"
    ) as write_file:
        # tabで書き込む
        writer = csv.writer(write_file, delimiter="\t")
        writer.writerow(["都道府県", "人口密度(人/km2)"])

        for row in reader:
            population_density = float(row[2]) / float(row[3])
            writer.writerow([row[1], int(population_density)])

In [16]:
# 作成したファイルを読み込む
with open("tmpfiles/result.tsv", mode="r") as f:
    reader = csv.reader(f, delimiter="\t", quotechar="#")
    for row in reader:
        print(row)

['都道府県', '人口密度(人/km2)']
['東京都', '6334']
['神奈川県', '3807']
['千葉県', '1202']
['埼玉県', '1922']


In [17]:
with open("tmpfiles/sample.csv", mode="r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        print(row)

{'\ufeffid': '1', '都道府県': '東京都', '人口(人)': '13900000', '面積(km2)': '2194.5'}
{'\ufeffid': '2', '都道府県': '神奈川県', '人口(人)': '9200000', '面積(km2)': '2416.1'}
{'\ufeffid': '3', '都道府県': '千葉県', '人口(人)': '6200000', '面積(km2)': '5157.5'}
{'\ufeffid': '4', '都道府県': '埼玉県', '人口(人)': '7300000', '面積(km2)': '3797.75'}


In [18]:
data = [
    {"都道府県": "東京都", "人口密度(人/km2)": "6335"},
    {"都道府県": "神奈川県", "人口密度(人/km2)": "3807"},
    {"都道府県": "千葉県", "人口密度(人/km2)": "1202"},
    {"都道府県": "埼玉県", "人口密度(人/km2)": "1922"},
]

with open("tmpfiles/result.csv", mode="w", encoding="utf-8") as f:
    field_name = ["都道府県", "人口密度(人/km2)"]
    writer = csv.DictWriter(f, fieldnames=field_name)
    writer.writeheader()
    writer.writerows(data)

In [19]:
with open("tmpfiles/result.csv", mode="r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        print(row)

{'都道府県': '東京都', '人口密度(人/km2)': '6335'}
{'都道府県': '神奈川県', '人口密度(人/km2)': '3807'}
{'都道府県': '千葉県', '人口密度(人/km2)': '1202'}
{'都道府県': '埼玉県', '人口密度(人/km2)': '1922'}


In [20]:
# Snifferクラスを使用したデータ形式の推測によるコード
with open("tmpfiles/result.tsv", newline="") as f:
    dialect = csv.Sniffer().sniff(f.read(1024))

    f.seek(0)
    reader = csv.reader(f, dialect)
    for row in reader:
        print(row)

['都道府県', '人口密度(人/km2)']
['東京都', '6334']
['神奈川県', '3807']
['千葉県', '1202']
['埼玉県', '1922']


### JSON を扱う


In [21]:
import json

data = [
    {"id": 123, "entitles": {"url": "python.org", "hashtags": ["#python", "pythonjp"]}},
]
print(json.dumps(data, indent=2))

[
  {
    "id": 123,
    "entitles": {
      "url": "python.org",
      "hashtags": [
        "#python",
        "pythonjp"
      ]
    }
  }
]


In [23]:
# keyを元にしてソートして出力する
print(json.dumps(data, indent=4, sort_keys=True))

[
    {
        "entitles": {
            "hashtags": [
                "#python",
                "pythonjp"
            ],
            "url": "python.org"
        },
        "id": 123
    }
]


In [None]:
from decimal import Decimal

json_str = '["ham", 1, "egg", 1.0, {"a": false, "b": null}]'
print(json.loads(json_str))
# 指定した方でキャストする
print(json.loads(json_str, parse_float=Decimal))
print(json.loads(json_str, parse_int=float))

['ham', 1, 'egg', 1.0, {'a': False, 'b': None}]
['ham', 1, 'egg', Decimal('1.0'), {'a': False, 'b': None}]
['ham', 1.0, 'egg', 1.0, {'a': False, 'b': None}]


In [30]:
with open("tmpfiles/sample.json", mode="r") as f:
    json_str = json.load(f)
    print(json_str)

json_str[0]["entitles"]["hashtags"].append("#pyhack")

with open("tmpfiles/dump.json", mode="w") as f:
    json.dump(json_str, f, indent=2)

[{'entitles': {'hashtags': ['#python', 'pythonjp'], 'url': 'python.org'}, 'id': 123}]


In [31]:
with open("tmpfiles/dump.json", mode="r") as f:
    json_str = json.load(f)
    print(json_str)

[{'entitles': {'hashtags': ['#python', 'pythonjp', '#pyhack'], 'url': 'python.org'}, 'id': 123}]


curl で api | python -m json.tool とすることで、dumps できる
