In [61]:
#************************************************************************
# Jesus Zamora
# ETL Project
# This code loads the latest player scoring stats from the ESPN website
# by scraping the website, cleaning the data and loading into MongoDB
#*************************************************************************

import pandas as pd
import requests
import pymongo

In [62]:
# ESPN scoring page
url = 'https://www.espn.com/nfl/stats/player/_/view/scoring'

In [63]:
# Loads table data from ESPN website
tableDF = pd.read_html(url)
df= tableDF[0]

In [64]:
# The code takes in the team name as part of the player last name, this code
# extracts the team name from the last name
teams_list=[]
names_list=[]

for item in df.Name:
    if item[-3].isupper():
        teams_list.append(item[-3:])
        names_list.append(item[:-3])
    else:
        teams_list.append(item[-2:])
        names_list.append(item[:-2])
df.Name = names_list
df['Team Name']=teams_list

In [65]:
# Join both tables with player name data and player stat data
bothDF = tableDF[0].join(tableDF[1])

In [66]:
# In place rename the columns since we get multilevel columns when scraping the data
bothDF.rename(columns={bothDF.columns[2]: 'Team Name', bothDF.columns[3]: "POS", bothDF.columns[4]:"GP",
                          bothDF.columns[5]:"RUSH", bothDF.columns[6]:"REC", bothDF.columns[7]:"RET",
                      bothDF.columns[8]:"TD", bothDF.columns[9]:"FG" ,
                      bothDF.columns[10]:"XP",bothDF.columns[11]:"2PT",
                      bothDF.columns[12]:"PTS", bothDF.columns[13]:"PTS/G"}, inplace=True)


In [67]:
bothDF

Unnamed: 0,RK,Name,Team Name,POS,GP,RUSH,REC,RET,TD,FG,XP,2PT,PTS,PTS/G
0,1,Harrison Butker,KC,PK,10,0,0,0,0,23,29,0,98,9.8
1,2,Zane Gonzalez,ARI,PK,10,0,0,0,0,24,18,0,90,9.0
2,3,Christian McCaffrey,CAR,RB,9,11,3,0,14,0,0,1,86,9.6
3,4,Aaron Jones,GB,RB,10,11,3,0,14,0,0,0,84,8.4
4,5,Justin Tucker,BAL,PK,9,0,0,0,0,17,31,0,82,9.1
5,6,Matt Gay,TB,PK,9,0,0,0,0,19,23,0,80,8.9
6,7,Josh Lambo,JAX,PK,9,0,0,0,0,22,12,0,78,8.7
7,8,Wil Lutz,NO,PK,9,0,0,0,0,19,19,0,76,8.4
8,9,Greg Zuerlein,LAR,PK,9,0,0,0,0,17,23,0,74,8.2
9,9,Chris Boswell,PIT,PK,10,0,0,0,0,18,20,0,74,7.4


In [68]:
# Save table as a CSV for possible future use
bothDF.to_csv('ScoringTable.csv')

In [69]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [70]:
# Define database and collection
db = client.NFL_StatsDB
collection = db.Scoring

In [71]:
# Converting our data to a dictionary to load it into MonboDB
data1 = bothDF.to_dict(orient='records')

In [72]:
# Insert our data into
collection.insert_many(data1)

<pymongo.results.InsertManyResult at 0x3d525a8>