In [82]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

df_path = '../data/combinedrushing2019.csv'
df = pd.read_csv(df_path, index_col=0)

In [60]:
df.head()

Unnamed: 0,RushingPlayer,OffenseTeam,DefenseTeam,Yards,RushDirection,Distance,Formation,PlayType,IsTouchdown,Pos,...,RushingDefRank,TotalYardsAllowed,ForcedFumbles,TO,RushingFirstDownsAllowed,RushingTDAllowed,RushingAttAllowed,RushingYdsAllowed,YardsPerRushAllowed,YardsPerPlay
0,A.ARMAH,CAR,ARI,4,6.0,67,2,1,0,RB,...,13.0,6432.0,10.0,17.0,94.0,9.0,439.0,1922.0,4.4,6.0
1,A.DALTON,CIN,ARI,0,0.0,58,2,1,0,QB,...,13.0,6432.0,10.0,17.0,94.0,9.0,439.0,1922.0,4.4,6.0
2,B.SNELL,PIT,ARI,3,6.0,23,1,1,0,RB,...,13.0,6432.0,10.0,17.0,94.0,9.0,439.0,1922.0,4.4,6.0
3,B.SNELL,PIT,ARI,3,6.0,48,1,1,0,RB,...,13.0,6432.0,10.0,17.0,94.0,9.0,439.0,1922.0,4.4,6.0
4,B.SNELL,PIT,ARI,3,6.0,22,1,1,0,RB,...,13.0,6432.0,10.0,17.0,94.0,9.0,439.0,1922.0,4.4,6.0


In [61]:
df.columns

Index(['RushingPlayer', 'OffenseTeam', 'DefenseTeam', 'Yards', 'RushDirection',
       'Distance', 'Formation', 'PlayType', 'IsTouchdown', 'Pos', 'G',
       'RushingAtt', 'RushingYds', 'RushingTD', 'FantasyPoints',
       'RushingYds/RushingAtt', 'RushingTD/RushingAtt', 'RushingDefRank',
       'TotalYardsAllowed', 'ForcedFumbles', 'TO', 'RushingFirstDownsAllowed',
       'RushingTDAllowed', 'RushingAttAllowed', 'RushingYdsAllowed',
       'YardsPerRushAllowed', 'YardsPerPlay'],
      dtype='object')

In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8343 entries, 0 to 8607
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   RushingPlayer             8343 non-null   object 
 1   OffenseTeam               8343 non-null   object 
 2   DefenseTeam               8343 non-null   object 
 3   Yards                     8343 non-null   int64  
 4   RushDirection             8343 non-null   float64
 5   Distance                  8343 non-null   int64  
 6   Formation                 8343 non-null   int64  
 7   PlayType                  8343 non-null   int64  
 8   IsTouchdown               8343 non-null   int64  
 9   Pos                       8343 non-null   object 
 10  G                         8343 non-null   float64
 11  RushingAtt                8343 non-null   float64
 12  RushingYds                8343 non-null   float64
 13  RushingTD                 8343 non-null   float64
 14  FantasyP


```python

df.sort_values(['b', 'c'], ascending=[True, False], inplace=True)

```

In [63]:
df.columns

Index(['RushingPlayer', 'OffenseTeam', 'DefenseTeam', 'Yards', 'RushDirection',
       'Distance', 'Formation', 'PlayType', 'IsTouchdown', 'Pos', 'G',
       'RushingAtt', 'RushingYds', 'RushingTD', 'FantasyPoints',
       'RushingYds/RushingAtt', 'RushingTD/RushingAtt', 'RushingDefRank',
       'TotalYardsAllowed', 'ForcedFumbles', 'TO', 'RushingFirstDownsAllowed',
       'RushingTDAllowed', 'RushingAttAllowed', 'RushingYdsAllowed',
       'YardsPerRushAllowed', 'YardsPerPlay'],
      dtype='object')

In [64]:
df.sort_values(['RushingPlayer', 'FantasyPoints'], ascending=[True, True], inplace=True)

In [65]:
df = df[['RushingPlayer', 'Pos','FantasyPoints']]

In [66]:
df = df.reset_index(drop=True)

In [67]:
df.head()

Unnamed: 0,RushingPlayer,Pos,FantasyPoints
0,A.ABDULLAH,RB,39.3
1,A.ABDULLAH,RB,39.3
2,A.ABDULLAH,RB,39.3
3,A.ABDULLAH,RB,39.3
4,A.ABDULLAH,RB,39.3


In [68]:
df.duplicated(subset=['RushingPlayer'])

0       False
1        True
2        True
3        True
4        True
        ...  
8338     True
8339     True
8340     True
8341    False
8342    False
Length: 8343, dtype: bool

In [69]:
df['RushingPlayer'] = df['RushingPlayer'].astype('str') 

In [70]:
df['bool'] =  df.duplicated(subset=['FantasyPoints'], keep='last')

In [73]:
df = df[df['bool'] == False]

In [74]:
df.head()

Unnamed: 0,RushingPlayer,Pos,FantasyPoints,bool
16,A.ABDULLAH,RB,39.3,False
23,A.ARMAH,RB,9.7,False
24,A.BECK,RB,24.54,False
27,A.BROWN,WR,217.1,False
35,A.DALTON,QB,199.66,False


In [75]:
df = df[['RushingPlayer', 'Pos', 'FantasyPoints']]

In [76]:
df = df.sort_values(by=['FantasyPoints'], ascending=False)


In [77]:
df.head()

Unnamed: 0,RushingPlayer,Pos,FantasyPoints
1689,C.MCCAFFREY,RB,469.2
6024,L.JACKSON,QB,415.68
3084,D.PRESCOTT,QB,335.78
7779,R.WILSON,QB,326.6
3370,D.WATSON,QB,318.98


In [78]:
df = df.rename(columns={'RushingPlayer':'Player'})

In [79]:
df

Unnamed: 0,Player,Pos,FantasyPoints
1689,C.MCCAFFREY,RB,469.20
6024,L.JACKSON,QB,415.68
3084,D.PRESCOTT,QB,335.78
7779,R.WILSON,QB,326.60
3370,D.WATSON,QB,318.98
...,...,...,...
2074,D.DAWKINS,RB,2.60
8317,W.GRIER,QB,1.32
5033,J.SCARLETT,RB,0.90
3252,D.TURNER,RB,0.60


In [43]:
#https://www.kite.com/python/answers/how-to-split-a-pandas-dataframe-column-in-python


In [80]:
df

Unnamed: 0,Player,Pos,FantasyPoints
1689,C.MCCAFFREY,RB,469.20
6024,L.JACKSON,QB,415.68
3084,D.PRESCOTT,QB,335.78
7779,R.WILSON,QB,326.60
3370,D.WATSON,QB,318.98
...,...,...,...
2074,D.DAWKINS,RB,2.60
8317,W.GRIER,QB,1.32
5033,J.SCARLETT,RB,0.90
3252,D.TURNER,RB,0.60


In [81]:
df.to_csv('2019fantasy_points.csv', index=False)

In [None]:
df.to

Unnamed: 0,RushingPlayer,FantasyPoints,Full Name
1689,C.MCCAFFREY,469.2,C.MCCAFFREY
6024,L.JACKSON,415.68,L.JACKSON
3084,D.PRESCOTT,335.78,D.PRESCOTT
7779,R.WILSON,326.6,R.WILSON
3370,D.WATSON,318.98,D.WATSON


## next goal here is to compare 2020 and then plot it with lines between the two and make it interactive like d3.

* and, so need to load 2020 and then do a join
* need to learn d3

In [9]:
duplicated_df = pd.DataFrame({
    'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
    'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
    'rating': [4, 4, 3.5, 15, 5]})

In [10]:
duplicated_df.head()

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


```python

duplicated_df.head()

duplicated_df.duplicated(subset=['style'])
# returns 0 False, 1 True, 2 True, 3 False, 4 True

duplicated_df['bool'] =  duplicated_df.duplicated(subset=['style'])

# next-line

duplicated_df = duplicated_df[duplicated_df['bool'] == True]

# next-line 2

```

In [26]:
#next-line
duplicated_df

Unnamed: 0,brand,style,rating,bool
0,Yum Yum,cup,4.0,False
1,Yum Yum,cup,4.0,True
2,Indomie,cup,3.5,True
3,Indomie,pack,15.0,False
4,Indomie,pack,5.0,True


In [30]:
#next-line 2
duplicated_df

Unnamed: 0,brand,style,rating,bool
1,Yum Yum,cup,4.0,True
2,Indomie,cup,3.5,True
4,Indomie,pack,5.0,True
