In [38]:
#************************************************************************
# Jesus Zamora
# ETL Project
# This code loads the latest player defense stats from the ESPN website
# by scraping the website, cleaning the data and loading into MongoDB
#*************************************************************************

import pandas as pd
import requests
import pymongo

In [39]:
# ESPN defense page
url = 'https://www.espn.com/nfl/stats/player/_/view/defense'

In [40]:
# Loads table data from ESPN website
tableDF = pd.read_html(url)
df= tableDF[0]

In [41]:
# The code takes in the team name as part of the player last name, this code
# extracts the team name from the last name
teams_list=[]
names_list=[]

for item in df.Name:
    if item[-3].isupper():
        teams_list.append(item[-3:])
        names_list.append(item[:-3])
    else:
        teams_list.append(item[-2:])
        names_list.append(item[:-2])
df.Name = names_list
df['Team Name']=teams_list

In [42]:
# Join both tables with player name data and player stat data
bothDF = tableDF[0].join(tableDF[1])

In [43]:
# In place rename the columns since we get multilevel columns when scraping the data
bothDF.rename(columns={bothDF.columns[2]: 'Team Name', bothDF.columns[3]: "POS", bothDF.columns[4]:"GP",
                        bothDF.columns[5]:"SOLO", bothDF.columns[6]:"AST", bothDF.columns[7]:"TOT",
                      bothDF.columns[8]:"SACK", bothDF.columns[9]:"YDS" ,
                      bothDF.columns[10]:"TFL",bothDF.columns[11]:"PD",
                      bothDF.columns[12]:"INT", bothDF.columns[13]:"YDS",
                      bothDF.columns[14]:"LNG", bothDF.columns[15]:"TD",
                      bothDF.columns[16]:"FF",bothDF.columns[17]:"FR",
                      bothDF.columns[18]:"FTD" }, inplace=True)


In [44]:
# Display table
bothDF

Unnamed: 0,RK,Name,Team Name,POS,GP,SOLO,AST,TOT,SACK,YDS,TFL,PD,INT,YDS.1,LNG,TD,FF,FR,FTD
0,1,Blake Martinez,GB,LB,10,63,39,102,1.0,6,5,0,0,0,0,0,1,0,0
1,2,Jordan Hicks,ARI,LB,10,57,41,98,0.5,2,8,5,2,16,14,0,2,0,0
2,3,Bobby Wagner,SEA,LB,10,50,47,97,2.0,15,6,3,0,0,0,0,1,1,0
3,4,Joe Schobert,CLE,LB,10,67,25,92,2.0,19,5,5,2,20,20,0,2,1,0
4,5,Budda Baker,ARI,S,10,59,26,85,0.0,0,4,6,0,0,0,0,1,1,0
5,6,K.J. Wright,SEA,LB,10,41,42,83,0.0,0,4,7,1,0,0,0,0,0,0
6,7,Luke Kuechly,CAR,LB,9,45,37,82,0.0,0,5,7,2,26,25,0,0,0,0
7,7,Rashaan Evans,TEN,LB,10,50,32,82,1.5,7,8,0,0,0,0,0,0,1,1
8,9,Jaylon Smith,DAL,LB,9,48,33,81,2.5,31,5,2,0,0,0,0,2,0,0
9,10,Eric Kendricks,MIN,LB,10,53,27,80,0.5,4,4,11,0,0,0,0,1,0,0


In [45]:
# Save table as a CSV
bothDF.to_csv('DefenseTable.csv')

In [46]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [47]:
# Define database and collection
db = client.NFL_StatsDB
collection = db.Defense

In [48]:
# Converting our data to a dictionary to load it into MonboDB
data1 = bothDF.to_dict(orient='records')

  


In [49]:
# Insert our data into
collection.insert_many(data1)

<pymongo.results.InsertManyResult at 0xedd10d0>