# Json切り分け1

## データの取得・確認

In [1]:
import pandas as pd
import numpy as np


train = pd.read_csv('../data/input/train.csv')
test = pd.read_csv('../data/input/test.csv')

print('Train samples: {}, features: {}'.format(*train.shape))
print('Test samples: {}, features: {}'.format(*test.shape))

Train samples: 3000, features: 23
Test samples: 4398, features: 22


In [2]:
train.head(3)

Unnamed: 0,id,belongs_to_collection,budget,genres,homepage,imdb_id,original_language,original_title,overview,popularity,...,release_date,runtime,spoken_languages,status,tagline,title,Keywords,cast,crew,revenue
0,1,"[{'id': 313576, 'name': 'Hot Tub Time Machine ...",14000000,"[{'id': 35, 'name': 'Comedy'}]",,tt2637294,en,Hot Tub Time Machine 2,"When Lou, who has become the ""father of the In...",6.575393,...,2/20/15,93.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Laws of Space and Time are About to be Vio...,Hot Tub Time Machine 2,"[{'id': 4379, 'name': 'time travel'}, {'id': 9...","[{'cast_id': 4, 'character': 'Lou', 'credit_id...","[{'credit_id': '59ac067c92514107af02c8c8', 'de...",12314651
1,2,"[{'id': 107674, 'name': 'The Princess Diaries ...",40000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,tt0368933,en,The Princess Diaries 2: Royal Engagement,Mia Thermopolis is now a college graduate and ...,8.248895,...,8/6/04,113.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,It can take a lifetime to find true love; she'...,The Princess Diaries 2: Royal Engagement,"[{'id': 2505, 'name': 'coronation'}, {'id': 42...","[{'cast_id': 1, 'character': 'Mia Thermopolis'...","[{'credit_id': '52fe43fe9251416c7502563d', 'de...",95149435
2,3,,3300000,"[{'id': 18, 'name': 'Drama'}]",http://sonyclassics.com/whiplash/,tt2582802,en,Whiplash,"Under the direction of a ruthless instructor, ...",64.29999,...,10/10/14,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The road to greatness can take you to the edge.,Whiplash,"[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...","[{'cast_id': 5, 'character': 'Andrew Neimann',...","[{'credit_id': '54d5356ec3a3683ba0000039', 'de...",13092000


## belongs_to_collectionをJson切り分け

In [3]:
import json
import ast

# 訓練データをJson切り分け
train_btc = train.loc[:, 'belongs_to_collection'].to_json(orient='index')
train_btc_dict = json.loads(train_btc)

id_train_dict = {}
name_train_dict = {}
poster_path_train_dict = {}
backdrop_path_train_dict = {}
for k, v in train_btc_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素が1つだけじゃなかったら処理を中断
        if len(v) >= 2:
            print('Stop ! key: {}, value: {}'.format(k, v))
            break
        
        id_train_dict[k] = v[0]['id']
        name_train_dict[k] = v[0]['name']
        poster_path_train_dict[k] = v[0]['poster_path']
        backdrop_path_train_dict[k] = v[0]['backdrop_path']

id_train_se = pd.Series(id_train_dict, id_train_dict.keys(), name='belongs_to_collection_id')
name_train_se = pd.Series(name_train_dict, name_train_dict.keys(), name='belongs_to_collection_name')
poster_path_train_se = pd.Series(poster_path_train_dict, poster_path_train_dict.keys(), name='belongs_to_collection_poster_path')
backdrop_path_train_se = pd.Series(backdrop_path_train_dict, backdrop_path_train_dict.keys(), name='belongs_to_collection_backdrop_path')

# テストデータをJson切り分け
test_btc = test.loc[:, 'belongs_to_collection'].to_json(orient='index')
test_btc_dict = json.loads(test_btc)

id_test_dict = {}
name_test_dict = {}
poster_path_test_dict = {}
backdrop_path_test_dict = {}
for k, v in test_btc_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素が1つだけじゃなかったら処理を中断
        if len(v) >= 2:
            print('Stop ! key: {}, value: {}'.format(k, v))
            break
        
        id_test_dict[k] = v[0]['id']
        name_test_dict[k] = v[0]['name']
        poster_path_test_dict[k] = v[0]['poster_path']
        backdrop_path_test_dict[k] = v[0]['backdrop_path']

id_test_se = pd.Series(id_test_dict, id_test_dict.keys(), name='belongs_to_collection_id')
name_test_se = pd.Series(name_test_dict, name_test_dict.keys(), name='belongs_to_collection_name')
poster_path_test_se = pd.Series(poster_path_test_dict, poster_path_test_dict.keys(), name='belongs_to_collection_poster_path')
backdrop_path_test_se = pd.Series(backdrop_path_test_dict, backdrop_path_test_dict.keys(), name='belongs_to_collection_backdrop_path')

In [4]:
train = pd.concat([train, id_train_se, name_train_se, poster_path_train_se, backdrop_path_train_se], axis=1)
train.drop('belongs_to_collection', inplace=True, axis=1)
train.head(3)

Unnamed: 0,id,budget,genres,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,...,tagline,title,Keywords,cast,crew,revenue,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path
0,1,14000000,"[{'id': 35, 'name': 'Comedy'}]",,tt2637294,en,Hot Tub Time Machine 2,"When Lou, who has become the ""father of the In...",6.575393,/tQtWuwvMf0hCc2QR2tkolwl7c3c.jpg,...,The Laws of Space and Time are About to be Vio...,Hot Tub Time Machine 2,"[{'id': 4379, 'name': 'time travel'}, {'id': 9...","[{'cast_id': 4, 'character': 'Lou', 'credit_id...","[{'credit_id': '59ac067c92514107af02c8c8', 'de...",12314651,313576.0,Hot Tub Time Machine Collection,/iEhb00TGPucF0b4joM1ieyY026U.jpg,/noeTVcgpBiD48fDjFVic1Vz7ope.jpg
1,2,40000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,tt0368933,en,The Princess Diaries 2: Royal Engagement,Mia Thermopolis is now a college graduate and ...,8.248895,/w9Z7A0GHEhIp7etpj0vyKOeU1Wx.jpg,...,It can take a lifetime to find true love; she'...,The Princess Diaries 2: Royal Engagement,"[{'id': 2505, 'name': 'coronation'}, {'id': 42...","[{'cast_id': 1, 'character': 'Mia Thermopolis'...","[{'credit_id': '52fe43fe9251416c7502563d', 'de...",95149435,107674.0,The Princess Diaries Collection,/wt5AMbxPTS4Kfjx7Fgm149qPfZl.jpg,/zSEtYD77pKRJlUPx34BJgUG9v1c.jpg
2,3,3300000,"[{'id': 18, 'name': 'Drama'}]",http://sonyclassics.com/whiplash/,tt2582802,en,Whiplash,"Under the direction of a ruthless instructor, ...",64.29999,/lIv1QinFqz4dlp5U4lQ6HaiskOZ.jpg,...,The road to greatness can take you to the edge.,Whiplash,"[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...","[{'cast_id': 5, 'character': 'Andrew Neimann',...","[{'credit_id': '54d5356ec3a3683ba0000039', 'de...",13092000,,,,


In [5]:
test = pd.concat([test, id_test_se, name_test_se, poster_path_test_se, backdrop_path_test_se], axis=1)
test.drop('belongs_to_collection', inplace=True, axis=1)
test.head(3)

Unnamed: 0,id,budget,genres,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,...,status,tagline,title,Keywords,cast,crew,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path
0,3001,0,"[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...",http://www.pokemon.com/us/movies/movie-pokemon...,tt1226251,ja,ディアルガVSパルキアVSダークライ,Ash and friends (this time accompanied by newc...,3.851534,/tnftmLMemPLduW6MRyZE0ZUD19z.jpg,...,Released,Somewhere Between Time & Space... A Legend Is ...,Pokémon: The Rise of Darkrai,"[{'id': 11451, 'name': 'pok√©mon'}, {'id': 115...","[{'cast_id': 3, 'character': 'Tonio', 'credit_...","[{'credit_id': '52fe44e7c3a368484e03d683', 'de...",34055.0,Pokémon Collection,/j5te0YNZAMXDBnsqTUDKIBEt8iu.jpg,/iGoYKA0TFfgSoZpG2u5viTJMGfK.jpg
1,3002,88000,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,tt0051380,en,Attack of the 50 Foot Woman,When an abused wife grows to giant size becaus...,3.559789,/9MgBNBqlH1sG4yG2u4XkwI5CoJa.jpg,...,Released,A titanic beauty spreads a macabre wave of hor...,Attack of the 50 Foot Woman,"[{'id': 9748, 'name': 'revenge'}, {'id': 9951,...","[{'cast_id': 2, 'character': 'Nancy Fowler Arc...","[{'credit_id': '55807805c3a3685b1300060b', 'de...",,,,
2,3003,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",,tt0118556,en,Addicted to Love,Good-natured astronomer Sam is devastated when...,8.085194,/ed6nD7h9sbojSWY2qrnDcSvDFko.jpg,...,Released,A Comedy About Lost Loves And Last Laughs,Addicted to Love,"[{'id': 931, 'name': 'jealousy'}, {'id': 9673,...","[{'cast_id': 11, 'character': 'Maggie', 'credi...","[{'credit_id': '52fe4330c3a36847f8041367', 'de...",,,,


## genresをJson切り分け

In [6]:
import json
import ast

# 訓練データをJson切り分け
train_gen = train.loc[:, 'genres'].to_json(orient='index')
train_gen_dict = json.loads(train_gen)

id_train_dict = {}
name_train_dict = {}
for k, v in train_gen_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素数で処理を分岐
        if len(v) >= 2:
            id_train_list = []
            name_train_list = []
            for i in range(len(v)):
                id_train_list.append(str(v[i]['id']))
                name_train_list.append(v[i]['name'])
            
            id_train_dict[k] = ', '.join(id_train_list)
            name_train_dict[k] = ', '.join(name_train_list)
        else:
            id_train_dict[k] = v[0]['id']
            name_train_dict[k] = v[0]['name']
        

id_train_se = pd.Series(id_train_dict, id_train_dict.keys(), name='genres_id')
name_train_se = pd.Series(name_train_dict, name_train_dict.keys(), name='genres_name')

# テストデータをJson切り分け
test_gen = test.loc[:, 'genres'].to_json(orient='index')
test_gen_dict = json.loads(test_gen)

id_test_dict = {}
name_test_dict = {}
for k, v in test_gen_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素数で処理を分岐
        if len(v) >= 2:
            id_test_list = []
            name_test_list = []
            for i in range(len(v)):
                id_test_list.append(str(v[i]['id']))
                name_test_list.append(v[i]['name'])
            
            id_test_dict[k] = ', '.join(id_test_list)
            name_test_dict[k] = ', '.join(name_test_list)
        else:
            id_test_dict[k] = v[0]['id']
            name_test_dict[k] = v[0]['name']
        

id_test_se = pd.Series(id_test_dict, id_test_dict.keys(), name='genres_id')
name_test_se = pd.Series(name_test_dict, name_test_dict.keys(), name='genres_name')

In [7]:
train = pd.concat([train, id_train_se, name_train_se], axis=1)
train.drop('genres', inplace=True, axis=1)
train.head(3)

Unnamed: 0,id,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,...,Keywords,cast,crew,revenue,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,genres_id,genres_name
0,1,14000000,,tt2637294,en,Hot Tub Time Machine 2,"When Lou, who has become the ""father of the In...",6.575393,/tQtWuwvMf0hCc2QR2tkolwl7c3c.jpg,"[{'name': 'Paramount Pictures', 'id': 4}, {'na...",...,"[{'id': 4379, 'name': 'time travel'}, {'id': 9...","[{'cast_id': 4, 'character': 'Lou', 'credit_id...","[{'credit_id': '59ac067c92514107af02c8c8', 'de...",12314651,313576.0,Hot Tub Time Machine Collection,/iEhb00TGPucF0b4joM1ieyY026U.jpg,/noeTVcgpBiD48fDjFVic1Vz7ope.jpg,35,Comedy
1,2,40000000,,tt0368933,en,The Princess Diaries 2: Royal Engagement,Mia Thermopolis is now a college graduate and ...,8.248895,/w9Z7A0GHEhIp7etpj0vyKOeU1Wx.jpg,"[{'name': 'Walt Disney Pictures', 'id': 2}]",...,"[{'id': 2505, 'name': 'coronation'}, {'id': 42...","[{'cast_id': 1, 'character': 'Mia Thermopolis'...","[{'credit_id': '52fe43fe9251416c7502563d', 'de...",95149435,107674.0,The Princess Diaries Collection,/wt5AMbxPTS4Kfjx7Fgm149qPfZl.jpg,/zSEtYD77pKRJlUPx34BJgUG9v1c.jpg,"35, 18, 10751, 10749","Comedy, Drama, Family, Romance"
2,3,3300000,http://sonyclassics.com/whiplash/,tt2582802,en,Whiplash,"Under the direction of a ruthless instructor, ...",64.29999,/lIv1QinFqz4dlp5U4lQ6HaiskOZ.jpg,"[{'name': 'Bold Films', 'id': 2266}, {'name': ...",...,"[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...","[{'cast_id': 5, 'character': 'Andrew Neimann',...","[{'credit_id': '54d5356ec3a3683ba0000039', 'de...",13092000,,,,,18,Drama


In [8]:
test = pd.concat([test, id_test_se, name_test_se], axis=1)
test.drop('genres', inplace=True, axis=1)
test.head(3)

Unnamed: 0,id,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,...,title,Keywords,cast,crew,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,genres_id,genres_name
0,3001,0,http://www.pokemon.com/us/movies/movie-pokemon...,tt1226251,ja,ディアルガVSパルキアVSダークライ,Ash and friends (this time accompanied by newc...,3.851534,/tnftmLMemPLduW6MRyZE0ZUD19z.jpg,,...,Pokémon: The Rise of Darkrai,"[{'id': 11451, 'name': 'pok√©mon'}, {'id': 115...","[{'cast_id': 3, 'character': 'Tonio', 'credit_...","[{'credit_id': '52fe44e7c3a368484e03d683', 'de...",34055.0,Pokémon Collection,/j5te0YNZAMXDBnsqTUDKIBEt8iu.jpg,/iGoYKA0TFfgSoZpG2u5viTJMGfK.jpg,"12, 16, 10751, 14","Adventure, Animation, Family, Fantasy"
1,3002,88000,,tt0051380,en,Attack of the 50 Foot Woman,When an abused wife grows to giant size becaus...,3.559789,/9MgBNBqlH1sG4yG2u4XkwI5CoJa.jpg,"[{'name': 'Woolner Brothers Pictures Inc.', 'i...",...,Attack of the 50 Foot Woman,"[{'id': 9748, 'name': 'revenge'}, {'id': 9951,...","[{'cast_id': 2, 'character': 'Nancy Fowler Arc...","[{'credit_id': '55807805c3a3685b1300060b', 'de...",,,,,"27, 878","Horror, Science Fiction"
2,3003,0,,tt0118556,en,Addicted to Love,Good-natured astronomer Sam is devastated when...,8.085194,/ed6nD7h9sbojSWY2qrnDcSvDFko.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",...,Addicted to Love,"[{'id': 931, 'name': 'jealousy'}, {'id': 9673,...","[{'cast_id': 11, 'character': 'Maggie', 'credi...","[{'credit_id': '52fe4330c3a36847f8041367', 'de...",,,,,"35, 10749","Comedy, Romance"


## KeywordsをJson切り分け

In [9]:
import json
import ast

# 訓練データをJson切り分け
train_gen = train.loc[:, 'Keywords'].to_json(orient='index')
train_gen_dict = json.loads(train_gen)

id_train_dict = {}
name_train_dict = {}
for k, v in train_gen_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素数で処理を分岐
        if len(v) >= 2:
            id_train_list = []
            name_train_list = []
            for i in range(len(v)):
                id_train_list.append(str(v[i]['id']))
                name_train_list.append(v[i]['name'])
            
            id_train_dict[k] = ', '.join(id_train_list)
            name_train_dict[k] = ', '.join(name_train_list)
        else:
            id_train_dict[k] = v[0]['id']
            name_train_dict[k] = v[0]['name']
        

id_train_se = pd.Series(id_train_dict, id_train_dict.keys(), name='Keywords_id')
name_train_se = pd.Series(name_train_dict, name_train_dict.keys(), name='Keywords_name')

# テストデータをJson切り分け
test_gen = test.loc[:, 'Keywords'].to_json(orient='index')
test_gen_dict = json.loads(test_gen)

id_test_dict = {}
name_test_dict = {}
for k, v in test_gen_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素数で処理を分岐
        if len(v) >= 2:
            id_test_list = []
            name_test_list = []
            for i in range(len(v)):
                id_test_list.append(str(v[i]['id']))
                name_test_list.append(v[i]['name'])
            
            id_test_dict[k] = ', '.join(id_test_list)
            name_test_dict[k] = ', '.join(name_test_list)
        else:
            id_test_dict[k] = v[0]['id']
            name_test_dict[k] = v[0]['name']
        

id_test_se = pd.Series(id_test_dict, id_test_dict.keys(), name='Keywords_id')
name_test_se = pd.Series(name_test_dict, name_test_dict.keys(), name='Keywords_name')

In [10]:
train = pd.concat([train, id_train_se, name_train_se], axis=1)
train.drop('Keywords', inplace=True, axis=1)
train.head(3)

Unnamed: 0,id,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,...,crew,revenue,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,genres_id,genres_name,Keywords_id,Keywords_name
0,1,14000000,,tt2637294,en,Hot Tub Time Machine 2,"When Lou, who has become the ""father of the In...",6.575393,/tQtWuwvMf0hCc2QR2tkolwl7c3c.jpg,"[{'name': 'Paramount Pictures', 'id': 4}, {'na...",...,"[{'credit_id': '59ac067c92514107af02c8c8', 'de...",12314651,313576.0,Hot Tub Time Machine Collection,/iEhb00TGPucF0b4joM1ieyY026U.jpg,/noeTVcgpBiD48fDjFVic1Vz7ope.jpg,35,Comedy,"4379, 9663, 11830, 179431","time travel, sequel, hot tub, duringcreditssti..."
1,2,40000000,,tt0368933,en,The Princess Diaries 2: Royal Engagement,Mia Thermopolis is now a college graduate and ...,8.248895,/w9Z7A0GHEhIp7etpj0vyKOeU1Wx.jpg,"[{'name': 'Walt Disney Pictures', 'id': 2}]",...,"[{'credit_id': '52fe43fe9251416c7502563d', 'de...",95149435,107674.0,The Princess Diaries Collection,/wt5AMbxPTS4Kfjx7Fgm149qPfZl.jpg,/zSEtYD77pKRJlUPx34BJgUG9v1c.jpg,"35, 18, 10751, 10749","Comedy, Drama, Family, Romance","2505, 4263, 6038, 13072","coronation, duty, marriage, falling in love"
2,3,3300000,http://sonyclassics.com/whiplash/,tt2582802,en,Whiplash,"Under the direction of a ruthless instructor, ...",64.29999,/lIv1QinFqz4dlp5U4lQ6HaiskOZ.jpg,"[{'name': 'Bold Films', 'id': 2266}, {'name': ...",...,"[{'credit_id': '54d5356ec3a3683ba0000039', 'de...",13092000,,,,,18,Drama,"1416, 1523, 1640, 2176, 14512, 14819, 33896, 1...","jazz, obsession, conservatory, music teacher, ..."


In [11]:
test = pd.concat([test, id_test_se, name_test_se], axis=1)
test.drop('Keywords', inplace=True, axis=1)
test.head(3)

Unnamed: 0,id,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,...,cast,crew,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,genres_id,genres_name,Keywords_id,Keywords_name
0,3001,0,http://www.pokemon.com/us/movies/movie-pokemon...,tt1226251,ja,ディアルガVSパルキアVSダークライ,Ash and friends (this time accompanied by newc...,3.851534,/tnftmLMemPLduW6MRyZE0ZUD19z.jpg,,...,"[{'cast_id': 3, 'character': 'Tonio', 'credit_...","[{'credit_id': '52fe44e7c3a368484e03d683', 'de...",34055.0,Pokémon Collection,/j5te0YNZAMXDBnsqTUDKIBEt8iu.jpg,/iGoYKA0TFfgSoZpG2u5viTJMGfK.jpg,"12, 16, 10751, 14","Adventure, Animation, Family, Fantasy","11451, 11551","pok√©mon, pocket monsters"
1,3002,88000,,tt0051380,en,Attack of the 50 Foot Woman,When an abused wife grows to giant size becaus...,3.559789,/9MgBNBqlH1sG4yG2u4XkwI5CoJa.jpg,"[{'name': 'Woolner Brothers Pictures Inc.', 'i...",...,"[{'cast_id': 2, 'character': 'Nancy Fowler Arc...","[{'credit_id': '55807805c3a3685b1300060b', 'de...",,,,,"27, 878","Horror, Science Fiction","9748, 9951, 11034, 11513, 14819, 15184, 189099...","revenge, alien, b movie, cheating husband, vio..."
2,3003,0,,tt0118556,en,Addicted to Love,Good-natured astronomer Sam is devastated when...,8.085194,/ed6nD7h9sbojSWY2qrnDcSvDFko.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",...,"[{'cast_id': 11, 'character': 'Maggie', 'credi...","[{'credit_id': '52fe4330c3a36847f8041367', 'de...",,,,,"35, 10749","Comedy, Romance","931, 9673, 9748, 14602","jealousy, love, revenge, break-up"


## production_companiesをJson切り分け

In [12]:
import json
import ast

# 訓練データをJson切り分け
train_gen = train.loc[:, 'production_companies'].to_json(orient='index')
train_gen_dict = json.loads(train_gen)

id_train_dict = {}
name_train_dict = {}
for k, v in train_gen_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素数で処理を分岐
        if len(v) >= 2:
            id_train_list = []
            name_train_list = []
            for i in range(len(v)):
                id_train_list.append(str(v[i]['id']))
                name_train_list.append(v[i]['name'])
            
            id_train_dict[k] = ', '.join(id_train_list)
            name_train_dict[k] = ', '.join(name_train_list)
        else:
            id_train_dict[k] = v[0]['id']
            name_train_dict[k] = v[0]['name']
        

id_train_se = pd.Series(id_train_dict, id_train_dict.keys(), name='production_companies_id')
name_train_se = pd.Series(name_train_dict, name_train_dict.keys(), name='production_companies_name')

# テストデータをJson切り分け
test_gen = test.loc[:, 'production_companies'].to_json(orient='index')
test_gen_dict = json.loads(test_gen)

id_test_dict = {}
name_test_dict = {}
for k, v in test_gen_dict.items():
    if v is not None:
        k = int(k)
        v = ast.literal_eval(v)
        
        # リストの要素数で処理を分岐
        if len(v) >= 2:
            id_test_list = []
            name_test_list = []
            for i in range(len(v)):
                id_test_list.append(str(v[i]['id']))
                name_test_list.append(v[i]['name'])
            
            id_test_dict[k] = ', '.join(id_test_list)
            name_test_dict[k] = ', '.join(name_test_list)
        else:
            id_test_dict[k] = v[0]['id']
            name_test_dict[k] = v[0]['name']
        

id_test_se = pd.Series(id_test_dict, id_test_dict.keys(), name='production_companies_id')
name_test_se = pd.Series(name_test_dict, name_test_dict.keys(), name='production_companies_name')

In [13]:
train = pd.concat([train, id_train_se, name_train_se], axis=1)
train.drop('production_companies', inplace=True, axis=1)
train.head(3)

Unnamed: 0,id,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,production_countries,...,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,genres_id,genres_name,Keywords_id,Keywords_name,production_companies_id,production_companies_name
0,1,14000000,,tt2637294,en,Hot Tub Time Machine 2,"When Lou, who has become the ""father of the In...",6.575393,/tQtWuwvMf0hCc2QR2tkolwl7c3c.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",...,313576.0,Hot Tub Time Machine Collection,/iEhb00TGPucF0b4joM1ieyY026U.jpg,/noeTVcgpBiD48fDjFVic1Vz7ope.jpg,35,Comedy,"4379, 9663, 11830, 179431","time travel, sequel, hot tub, duringcreditssti...","4, 60, 8411","Paramount Pictures, United Artists, Metro-Gold..."
1,2,40000000,,tt0368933,en,The Princess Diaries 2: Royal Engagement,Mia Thermopolis is now a college graduate and ...,8.248895,/w9Z7A0GHEhIp7etpj0vyKOeU1Wx.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",...,107674.0,The Princess Diaries Collection,/wt5AMbxPTS4Kfjx7Fgm149qPfZl.jpg,/zSEtYD77pKRJlUPx34BJgUG9v1c.jpg,"35, 18, 10751, 10749","Comedy, Drama, Family, Romance","2505, 4263, 6038, 13072","coronation, duty, marriage, falling in love",2,Walt Disney Pictures
2,3,3300000,http://sonyclassics.com/whiplash/,tt2582802,en,Whiplash,"Under the direction of a ruthless instructor, ...",64.29999,/lIv1QinFqz4dlp5U4lQ6HaiskOZ.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",...,,,,,18,Drama,"1416, 1523, 1640, 2176, 14512, 14819, 33896, 1...","jazz, obsession, conservatory, music teacher, ...","2266, 3172, 32157","Bold Films, Blumhouse Productions, Right of Wa..."


In [14]:
test = pd.concat([test, id_test_se, name_test_se], axis=1)
test.drop('production_companies', inplace=True, axis=1)
test.head(3)

Unnamed: 0,id,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,production_countries,...,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,genres_id,genres_name,Keywords_id,Keywords_name,production_companies_id,production_companies_name
0,3001,0,http://www.pokemon.com/us/movies/movie-pokemon...,tt1226251,ja,ディアルガVSパルキアVSダークライ,Ash and friends (this time accompanied by newc...,3.851534,/tnftmLMemPLduW6MRyZE0ZUD19z.jpg,"[{'iso_3166_1': 'JP', 'name': 'Japan'}, {'iso_...",...,34055.0,Pokémon Collection,/j5te0YNZAMXDBnsqTUDKIBEt8iu.jpg,/iGoYKA0TFfgSoZpG2u5viTJMGfK.jpg,"12, 16, 10751, 14","Adventure, Animation, Family, Fantasy","11451, 11551","pok√©mon, pocket monsters",,
1,3002,88000,,tt0051380,en,Attack of the 50 Foot Woman,When an abused wife grows to giant size becaus...,3.559789,/9MgBNBqlH1sG4yG2u4XkwI5CoJa.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",...,,,,,"27, 878","Horror, Science Fiction","9748, 9951, 11034, 11513, 14819, 15184, 189099...","revenge, alien, b movie, cheating husband, vio...",9233,Woolner Brothers Pictures Inc.
2,3003,0,,tt0118556,en,Addicted to Love,Good-natured astronomer Sam is devastated when...,8.085194,/ed6nD7h9sbojSWY2qrnDcSvDFko.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",...,,,,,"35, 10749","Comedy, Romance","931, 9673, 9748, 14602","jealousy, love, revenge, break-up","6194, 19507, 53009","Warner Bros., Outlaw Productions (I), Miramax"
