In [1]:
import pprint
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from json_shot_scraper import flatten_shot, flatten_goal, flatten_complete_pass, flatten_incomplete_pass, flatten_corner
from  player_scraper import flatten_player, flatten_sub
from dataframe_cleaner import (pass_to_shot, corner_to_shot, transpose_coordinates, coord_to_yards, 
                               shot_distance_angle, dummy_columns, drop_own_goals, goal_dummy)
from model_prep import create_hypothetical_df

In [2]:
pd.set_option('display.max_columns', 50)

In [3]:
from html_scraper import db
from mongo_to_db import game_to_cleaned_df, create_frame, create_master_df, create_master_player_min_df
import string

In [4]:
games = db.games.find()

In [5]:
test_game = games[0]

In [7]:
test_game.keys()

dict_keys(['_id', 'Revision', 'lastChangedDate', 'lastChangeBy', 'match', 'status', 'venueInformation', 'broadcast', 'scoreStatus', 'players', 'teams', 'officials', 'incidences', 'deletedIncidences', 'summary', 'commentary'])

In [9]:
test_game['teams'].keys()

dict_keys(['20', '13'])

In [10]:
test_game['teams']['20']

{'name': 'Vélez',
 'shortName': 'Vélez',
 'initials': 'VEL',
 'country': 'Argentina',
 'countryId': 1,
 'teamType': 'club',
 'colors': {'color': '#ffffff',
  'base_color': '#ffffff',
  'text': '#004f87',
  'path1': 'vshapeL',
  'path1_color': '#004f87',
  'path2': 'vshapeR',
  'path2_color': '#004f87',
  'shirt': 1},
 'social': {'página web': {'user': 'http://www.velezsarsfield.com.ar'},
  'instagram': {'user': 'velez'},
  'facebook': {'user': 'velezsarsfield'},
  'twitter': {'user': 'Velez'}}}

In [11]:
teams = list(test_game['teams'].items())

In [12]:
teams

[('20',
  {'name': 'Vélez',
   'shortName': 'Vélez',
   'initials': 'VEL',
   'country': 'Argentina',
   'countryId': 1,
   'teamType': 'club',
   'colors': {'color': '#ffffff',
    'base_color': '#ffffff',
    'text': '#004f87',
    'path1': 'vshapeL',
    'path1_color': '#004f87',
    'path2': 'vshapeR',
    'path2_color': '#004f87',
    'shirt': 1},
   'social': {'página web': {'user': 'http://www.velezsarsfield.com.ar'},
    'instagram': {'user': 'velez'},
    'facebook': {'user': 'velezsarsfield'},
    'twitter': {'user': 'Velez'}}}),
 ('13',
  {'name': 'Newell`s',
   'shortName': 'Newell`s',
   'initials': 'NOB',
   'country': 'Argentina',
   'countryId': 1,
   'teamType': 'club',
   'colors': {'color': '#000000',
    'base_color': '#000000',
    'text': '#ffffff',
    'path1': 'half',
    'path1_color': '#ff0000',
    'path2': '',
    'path2_color': '',
    'shirt': 1},
   'social': {'página web': {'user': 'http://www.newellsoldboys.com.ar/'},
    'instagram': {'user': 'canobofi

In [28]:
from json_shot_scraper import flatten_team, team_dicts

In [17]:
team_dicts = [flatten_team(team) for team in teams]

In [18]:
team_dicts

[{'team_id': '20', 'team_intitials': 'VEL', 'short_name': 'Vélez'},
 {'team_id': '13', 'team_intitials': 'NOB', 'short_name': 'Newell`s'}]

In [29]:
games = db.games.find()

In [21]:
team_dicts = []
for game in games:
    teams = list(game['teams'].items())
    game_team = [flatten_team(team) for team in teams]
    for team in game_team:
        if team not in team_dicts:
            team_dicts.append(team)

In [24]:
team_dicts

[{'team_id': '20', 'team_intitials': 'VEL', 'short_name': 'Vélez'},
 {'team_id': '13', 'team_intitials': 'NOB', 'short_name': 'Newell`s'},
 {'team_id': '136', 'team_intitials': 'TIG', 'short_name': 'Tigre'},
 {'team_id': '19', 'team_intitials': 'SLO', 'short_name': 'San Lorenzo'},
 {'team_id': '8', 'team_intitials': 'GIM', 'short_name': 'Gimnasia'},
 {'team_id': '2', 'team_intitials': 'ARG', 'short_name': 'Argentinos'},
 {'team_id': '137', 'team_intitials': 'UNI', 'short_name': 'Unión'},
 {'team_id': '122', 'team_intitials': 'ALD', 'short_name': 'Aldosivi'},
 {'team_id': '869', 'team_intitials': 'PA', 'short_name': 'Patronato'},
 {'team_id': '6', 'team_intitials': 'COL', 'short_name': 'Colón'},
 {'team_id': '124', 'team_intitials': 'BEL', 'short_name': 'Belgrano'},
 {'team_id': '134', 'team_intitials': 'SMS', 'short_name': 'S. Martín SJ'},
 {'team_id': '5', 'team_intitials': 'BOC', 'short_name': 'Boca'},
 {'team_id': '135', 'team_intitials': 'TAL', 'short_name': 'Talleres'},
 {'team_id

In [31]:
team_dicts = team_dicts(games)

In [32]:
team_dicts

[{'team_id': '20', 'team_intitials': 'VEL', 'short_name': 'Vélez'},
 {'team_id': '13', 'team_intitials': 'NOB', 'short_name': 'Newell`s'},
 {'team_id': '136', 'team_intitials': 'TIG', 'short_name': 'Tigre'},
 {'team_id': '19', 'team_intitials': 'SLO', 'short_name': 'San Lorenzo'},
 {'team_id': '8', 'team_intitials': 'GIM', 'short_name': 'Gimnasia'},
 {'team_id': '2', 'team_intitials': 'ARG', 'short_name': 'Argentinos'},
 {'team_id': '137', 'team_intitials': 'UNI', 'short_name': 'Unión'},
 {'team_id': '122', 'team_intitials': 'ALD', 'short_name': 'Aldosivi'},
 {'team_id': '869', 'team_intitials': 'PA', 'short_name': 'Patronato'},
 {'team_id': '6', 'team_intitials': 'COL', 'short_name': 'Colón'},
 {'team_id': '124', 'team_intitials': 'BEL', 'short_name': 'Belgrano'},
 {'team_id': '134', 'team_intitials': 'SMS', 'short_name': 'S. Martín SJ'},
 {'team_id': '5', 'team_intitials': 'BOC', 'short_name': 'Boca'},
 {'team_id': '135', 'team_intitials': 'TAL', 'short_name': 'Talleres'},
 {'team_id