In [2]:
import time

import numpy as np
import pandas as pd

In [3]:
source_csv = {
    2014: "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H26behavioral_characteristics_survey19-1.csv",
    2015: "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H27behavioral_characteristics_survey20.csv",
    2016: "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H28behavioral_characteristics_survey22.csv",
    2017: "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H29behavioral_characteristics_survey21.csv",
    2018: "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H30behavioral_characteristics_survey20.csv",
    2019: "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H31R1behavioral_characteristics_survey20.csv",
}

In [4]:
alpha3 = {
    "韓国": "KOR",
    "台湾": "TWN",
    "香港": "HKG",
    "中国": "CHN",
    "タイ": "THA",
    "シンガポール": "SGP",
    "マレーシア": "MYS",
    "インドネシア": "IDN",
    "フィリピン": "PHL",
    "ベトナム": "VNM",
    "インド": "IND",
    "英国": "GBR",
    "ドイツ": "DEU",
    "フランス": "FRA",
    "イタリア": "ITA",
    "スペイン": "ESP",
    "ロシア": "RUS",
    "米国": "USA",
    "カナダ": "CAN",
    "オーストラリア": "AUS",
}

### 旅行中支出額

In [4]:
def make_expenditure(year, csv_path):
    time.sleep(3)
    df = (
        pd.read_csv(csv_path, encoding="sjis")
        .rename({"Unnamed: 0": "国・地域", "都内支出額合計": "都内支出額計"}, axis=1)
        .iloc[:, :17]
    )
    df["国・地域"] = df["国・地域"].str.strip()
    for col in ["宿泊費", "飲食費", "都内交通費", "娯楽入場費", "土産買物費", "その他", "都内支出額計", "標本数"]:
        try:
            df[col] = df[col].str.replace(",", "").astype(float)
        except AttributeError:
            df[col] = df[col].astype(float)
    df["年度"] = year
    df["alpha-3"] = df["国・地域"].apply(lambda x: alpha3.get(x), np.nan)
    return df.drop(df.columns[df.columns.str.startswith("Unnamed")], axis=1)

In [5]:
expenditure_df = pd.concat(
    [make_expenditure(year, csv_path) for year, csv_path in source_csv.items()]
)
expenditure_df.to_csv("../data/expenditure_tokyo.csv", index=False)

### 一番満足した場所で行った活動

In [5]:
activity_wide_df = (
    pd.read_csv(
        "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H27behavioral_characteristics_survey16.csv",
        encoding="sjis",
    )
    .iloc[:-2]
    .rename({"Unnamed: 0": "一番満足した場所で行った活動"}, axis=1)
)
activity_wide_df.columns = activity_wide_df.columns.str.replace("\n", "")

for col in activity_wide_df.select_dtypes(include="object").columns[1:]:
    activity_wide_df[col] = activity_wide_df[col].astype(float)

activity_df = activity_wide_df.melt(
    id_vars="一番満足した場所で行った活動",
    value_vars=activity_wide_df.columns[1:],
    var_name="場所",
    value_name="%",
)

In [9]:
activity_df.to_csv("../data/activity_tokyo.csv", index=False)

### 東京に対するイメージ

In [11]:
image_wide_df = (
    pd.read_csv(
        "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H27behavioral_characteristics_survey22.csv",
        encoding="sjis",
    )
    .rename({"Unnamed: 0": "イメージ"}, axis=1)
    .iloc[:-2]
)

for col in image_wide_df.select_dtypes(include="object").columns[1:]:
    image_wide_df[col] = image_wide_df[col].astype(float)

image_df = image_wide_df.melt(
    id_vars="イメージ",
    value_vars=image_wide_df.columns[1:],
    var_name="国・地域",
    value_name="%",
)

In [12]:
image_df.to_csv("../data/image_tokyo.csv", index=False)

### 訪都の満足度

In [13]:
satisfaction_wide_df = pd.read_csv(
    "https://www.opendata.metro.tokyo.lg.jp/sangyouroudou/behavioral_characteristics_survey/H27behavioral_characteristics_survey17.csv",
    encoding="sjis",
).rename({"Unnamed: 0": "国・地域"}, axis=1)
satisfaction_wide_df.columns = satisfaction_wide_df.columns.str.replace("\n", "")
satisfaction_wide_df.drop(["合計", "標本数"], axis=1, inplace=True)
satisfaction_wide_df.dropna(inplace=True)

satisfaction_df = satisfaction_wide_df.melt(
    id_vars="国・地域",
    value_vars=satisfaction_wide_df.columns[1:],
    var_name="満足度",
    value_name="%",
)

In [14]:
satisfaction_df.to_csv("../data/satisfaction_tokyo.csv", index=False)