## Import Libraries & Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.pylab as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from scipy.stats import binom_test

pd.set_option('display.max_rows',500)

In [None]:
df = pd.read_csv("fut_ratings.csv")
df.head(15)

In [None]:
df.columns

## Clean Data - 'Event'

In [None]:
df['Event'].unique()

# Double check that 'Event' column is only numerical values 1-110 (AKA the # of events in data, which is 110)

In [None]:
df['trsh/door/brd/WO'].unique()

# Check if 'trsh/door/brd/WO' column has any typos. Should only be 4 values.
# This column should display "trash, door, board, WO". If values don't match up, change via Find and Replace
# Change after dropping blank rows

## Clean Data - Drop Blank Rows

In [None]:
df.dropna()

In [None]:
# newdf with blank rows dropped
data = df.dropna()

In [None]:
data.reset_index(drop=True, inplace=True)

## Clean Data - 'trsh/door/brd/WO'

In [None]:
data['trsh/door/brd/WO'].unique()

In [None]:
# Change to "trash, door, board, WO" by find and replace.

data.replace('doors','door',inplace = True)
data.replace('Board','board',inplace = True)
data.replace('boards','board',inplace = True)
data.replace('boad','board',inplace = True)
data.replace('walkout','WO',inplace = True)
data.replace('Walkout','WO',inplace = True)
data.head(30)

## Clean Data - 'Pack Types'

In [None]:
data['Pack Type'].unique()

In [None]:
# We see duplicate pack types from: misspellings, upper/lower case issues, spacing issues
# So we change the duplicates/problem packs to one common name for each different pack types

In [None]:
data.replace('Small Prime Electrum Players Pack','small prime electrum players pack',inplace=True)
data.replace('Small Prime Electurm Players Pack','small prime electrum players pack',inplace=True)
data.replace('Jumbo Premium Gold Players Pack','jumbo premium gold players pack',inplace=True)
data.replace('Small Prime Mixed Players Pack','small prime mixed players pack',inplace=True)
data.replace('Small Prime Gold Players pack','small prime gold players pack',inplace=True)
data.replace('Jumbo Premium Gold Players','jumbo premium gold players pack',inplace=True)
data.replace('Jumbo premoium gold players','jumbo premium gold players pack',inplace=True)
data.replace('Jumbo Rare Gold Players Pack','jumbo rare gold players pack',inplace=True)
data.replace('Small Prime Gold Player Pack','small prime gold players pack',inplace=True)
data.replace('Small Prime Gold Player Pack','small prime gold players pack',inplace=True)
data.replace('Small Rare Gold Players Pack','small rare gold players pack',inplace=True)
data.replace('Small Rare gold players pack','small rare gold players pack',inplace=True)
data.replace('Small Rare Mixed Players Pack','small rare mixed players pack',inplace=True)
data.replace('Small Rare mixed Players Pack','small rare mixed players pack',inplace=True)
data.replace('Premium Electrum Players Pack','premium electrum players pack',inplace=True)
data.replace('Gold Premium Players Pack','premium gold players pack',inplace=True)
data.replace('Jumbo Premium Gold 26 Pack','jumbo premium gold 26 pack',inplace=True)
data.replace('Jumbo Premium Gold Pack','jumbo premium gold pack',inplace=True)
data.replace('Jumbo Rare Player Pack','jumbo rare players pack',inplace=True)
data.replace('Jumbo Rare Players Pack','jumbo rare players pack',inplace=True)
data.replace('One Rare Player Pack','one rare player pack',inplace=True)
data.replace('PL Premium Players Pack','pl premium players pack',inplace=True)
data.replace('Premium Gold Jumbo Pack','jumbo premium gold pack',inplace=True)
data.replace('Premium Gold Players Pack','premium gold players pack',inplace=True)
data.replace('Premium Jumbo Gold 26 Pack','jumbo premium gold 26 pack',inplace=True)
data.replace('Premium Jumbo Gold Pack','jumbo premium gold pack',inplace=True)
data.replace('Premium Mixed Players Pack','premium mixed players pack',inplace=True)
data.replace('Prime Electrum Players Pack','prime electrum players pack',inplace=True)
data.replace('Prime electrum players pack','prime electrum players pack',inplace=True)
data.replace('Prime Gold Players Pack','prime gold players pack',inplace=True)
data.replace('Prime Mixed Players Pack','prime mixed players pack',inplace=True)
data.replace('Prime Silver Players Pack','prime silver players pack',inplace=True)
data.replace('Rare Electrum Players Pack','rare electrum players pack',inplace=True)
data.replace('Rare Mixed Players Pack','rare mixed players pack',inplace=True)
data.replace('Rare mixed players pack','rare mixed players pack',inplace=True)
data.replace('Rare Mixed Players Pack Pack','rare mixed players pack',inplace=True)
data.replace('Small Gold Players Pack','small gold players pack',inplace=True)
data.replace('Small Gold Players pack','small gold players pack',inplace=True)
data.replace('Two Rare Gold Player Pack','two rare gold players pack',inplace=True)
data.replace('Two Rare Gold Players Pack','two rare gold players pack',inplace=True)
data.replace('Two UCL Common Player Pack','two ucl common players pack',inplace=True)
data.replace('UCL Rare Player Pack','ucl rare player pack',inplace=True)
data.replace('UCL Three Player Pack','ucl three player pack',inplace=True)
data.replace('Electrum Players Pack','electrum players pack',inplace=True)
data.replace('Mixed Players Pack','mixed players pack',inplace=True)
data.replace('Rare Players Pack','rare players pack',inplace=True)
data.replace('Jumbo Gold Pack','jumbo gold pack',inplace=True)
data.replace('Rare Gold Pack','rare gold pack',inplace=True)
data.replace('Rare Mega Pack','rare mega pack',inplace=True)
data.replace('Rare Mega Pack ','rare mega pack',inplace=True)
data.replace('Ultimate Pack','ultimate pack',inplace=True)
data.replace('Two player pack','two players pack',inplace=True)
data.replace('Two Player Pack','two players pack',inplace=True)
data.replace('Two Players Pack','two players pack',inplace=True)
data.replace('Gold 13 Pack','gold 13 pack',inplace=True)
data.replace('Gold 26 Pack','gold 26 pack',inplace=True)
data.replace('Gold Players Pack','gold players pack',inplace=True)
data.replace('Premium Gold Pack','premium gold pack',inplace=True)
data.replace('Mega Pack','mega pack',inplace=True)
data.replace('Gold Pack','gold pack',inplace=True)

data.head(300)

In [None]:
data['Pack Type'].unique()

In [None]:
data.drop(columns=['Event','trsh/door/brd/WO','Date'],inplace=True)

# Drop columns, left code for find and replace as reference for later anaylsis. 

In [None]:
data.info()

In [None]:
grouped = data.groupby(['Pack Type'])
grouped.head()

In [None]:
for pack, group in grouped:
    print('Pack Type')
    print(group)
    print()

In [None]:
print(grouped.groups)

# Verify values + index are the same using eye test

In [None]:
print(len(grouped.groups))

In [None]:
# need avg top rating by probability

In [None]:
grouped.mean()

In [None]:
print(data['Pack Type'].nunique())

In [None]:
data['Pack Type'].value_counts()

In [None]:
grouped.describe()

In [None]:
# Drop pack if under 10 entries
# small electrum players pack, gold 13 pack, gold 26 pack, jumbo premium gold 26 pack, jumbo rare gold players pack,
# one rare player pack, pl premium players pack, prime silver players pack, small prime gold players pack
# small prime mixed players pack, two players pack, two ucl common players pack, ucl three player pack, ultimate pack

In [None]:
# data.loc[76,'Top Rating']

#this proves there is a top rating value for small electrum players pack, but nothing shows up"

### Finding True Pack Probabilities

## Packs to investigate:
##### 1: gold pack
##### 2: premium gold pack
##### 3: premium gold players pack
##### 4: jumbo premium gold pack
##### 5: prime gold players pack
##### 6: rare gold pack
##### 7: mega pack
##### 8: rare players pack
##### 9: premium electrum players pack

## 1: Gold Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 7.70%
#### Gold 84+ : 3.40%

In [None]:
gold_pack_75 = data.loc[((data['Pack Type'] == 'gold pack') & (data['Top Rating'] >= 75))]

In [None]:
gold_pack_82 = data.loc[((data['Pack Type'] == 'gold pack') & (data['Top Rating'] >= 82))]

In [None]:
gold_pack_84 = data.loc[((data['Pack Type'] == 'gold pack') & (data['Top Rating'] >= 84))]

In [None]:
print(f'75+ Count : {len(gold_pack_75)}')
print(f'82+ Count : {len(gold_pack_82)}')
print(f'84+ Count : {len(gold_pack_84)}')

In [None]:
gold_pack_75_num = len(data[(data['Pack Type'] == 'gold pack') & (data['Top Rating'] >= 75)])
gold_pack_82_num = len(data[(data['Pack Type'] == 'gold pack') & (data['Top Rating'] >= 82)])
gold_pack_84_num = len(data[(data['Pack Type'] == 'gold pack') & (data['Top Rating'] >= 84)])

print(gold_pack_75_num)
print(gold_pack_82_num)
print(gold_pack_84_num)

In [None]:
gold_pack_denom = len(data[(data['Pack Type'] == 'gold pack')])
gold_pack_denom

In [None]:
gold_pack_75_percent = gold_pack_75_num/gold_pack_denom
gold_pack_82_percent = gold_pack_82_num/gold_pack_denom
gold_pack_84_percent = gold_pack_84_num/gold_pack_denom


print(gold_pack_75_percent)
print(gold_pack_82_percent)
print(gold_pack_84_percent)

In [None]:
gold_pack_82

#weird that I never got an 83

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'gold pack')]['Top Rating']);

## 2: Premium Gold Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 19.00%
#### Gold 84+ : 4.70%

In [None]:
premium_gold_pack_75 = data.loc[((data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 75))]
premium_gold_pack_82 = data.loc[((data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 82))]
premium_gold_pack_84 = data.loc[((data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 84))]

In [None]:
print(f'75+ Count : {len(premium_gold_pack_75)}')
print(f'82+ Count : {len(premium_gold_pack_82)}')
print(f'84+ Count : {len(premium_gold_pack_84)}')

In [None]:
premium_gold_pack_75_num = len(data[(data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 75)])
premium_gold_pack_82_num = len(data[(data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 82)])
premium_gold_pack_84_num = len(data[(data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 84)])

print(premium_gold_pack_75_num)
print(premium_gold_pack_82_num)
print(premium_gold_pack_84_num)

#data[(data['Pack Type'] == 'premium gold pack') & (data['Top Rating'] >= 82)]

In [None]:
premium_gold_pack_denom = len(data[(data['Pack Type'] == 'premium gold pack')])
premium_gold_pack_denom

In [None]:
premium_gold_pack_75_percent = premium_gold_pack_75_num/premium_gold_pack_denom
premium_gold_pack_82_percent = premium_gold_pack_82_num/premium_gold_pack_denom
premium_gold_pack_84_percent = premium_gold_pack_84_num/premium_gold_pack_denom


print(premium_gold_pack_75_percent)
print(premium_gold_pack_82_percent)
print(premium_gold_pack_84_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'premium gold pack')]['Top Rating']);

## 3: Premium Gold Players Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 59.00%
#### Gold 86+ : 3.60%

In [None]:
premium_gold_players_pack_75 = data.loc[((data['Pack Type'] == 'premium gold players pack') & (data['Top Rating'] >= 75))]
premium_gold_players_pack_82 = data.loc[((data['Pack Type'] == 'premium gold players pack') & (data['Top Rating'] >= 82))]
premium_gold_players_pack_86 = data.loc[((data['Pack Type'] == 'premium gold players pack') & (data['Top Rating'] >= 86))]

In [None]:
print(f'75+ Count : {len(premium_gold_players_pack_75)}')
print(f'82+ Count : {len(premium_gold_players_pack_82)}')
print(f'86+ Count : {len(premium_gold_players_pack_86)}')

In [None]:
premium_gold_players_pack_75_num = len(data[(data['Pack Type'] == 'premium gold players pack') & (data['Top Rating'] >= 75)])
premium_gold_players_pack_82_num = len(data[(data['Pack Type'] == 'premium gold players pack') & (data['Top Rating'] >= 82)])
premium_gold_players_pack_86_num = len(data[(data['Pack Type'] == 'premium gold players pack') & (data['Top Rating'] >= 86)])

print(premium_gold_players_pack_75_num)
print(premium_gold_players_pack_82_num)
print(premium_gold_players_pack_86_num)

In [None]:
premium_gold_players_pack_denom = len(data[(data['Pack Type'] == 'premium gold players pack')])
premium_gold_players_pack_denom

In [None]:
premium_gold_players_pack_75_percent = premium_gold_players_pack_75_num/premium_gold_players_pack_denom
premium_gold_players_pack_82_percent = premium_gold_players_pack_82_num/premium_gold_players_pack_denom
premium_gold_players_pack_86_percent = premium_gold_players_pack_86_num/premium_gold_players_pack_denom


print(premium_gold_players_pack_75_percent)
print(premium_gold_players_pack_82_percent)
print(premium_gold_players_pack_86_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'premium gold players pack')]['Top Rating']);

## 4: Jumbo Premium Gold Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 42.00%
#### Gold 85+ : 4.60%

In [None]:
jumbo_premium_gold_pack_75 = data.loc[((data['Pack Type'] == 'jumbo premium gold pack') & (data['Top Rating'] >= 75))]
jumbo_premium_gold_pack_82 = data.loc[((data['Pack Type'] == 'jumbo premium gold pack') & (data['Top Rating'] >= 82))]
jumbo_premium_gold_pack_85 = data.loc[((data['Pack Type'] == 'jumbo premium gold pack') & (data['Top Rating'] >= 85))]

In [None]:
print(f'75+ Count : {len(jumbo_premium_gold_pack_75)}')
print(f'82+ Count : {len(jumbo_premium_gold_pack_82)}')
print(f'85+ Count : {len(jumbo_premium_gold_pack_85)}')

In [None]:
jumbo_premium_gold_pack_75_num = len(data[(data['Pack Type'] == 'jumbo premium gold pack') & (data['Top Rating'] >= 75)])
jumbo_premium_gold_pack_82_num = len(data[(data['Pack Type'] == 'jumbo premium gold pack') & (data['Top Rating'] >= 82)])
jumbo_premium_gold_pack_85_num = len(data[(data['Pack Type'] == 'jumbo premium gold pack') & (data['Top Rating'] >= 85)])

print(jumbo_premium_gold_pack_75_num)
print(jumbo_premium_gold_pack_82_num)
print(jumbo_premium_gold_pack_85_num)

In [None]:
jumbo_premium_gold_pack_denom = len(data[(data['Pack Type'] == 'jumbo premium gold pack')])
jumbo_premium_gold_pack_denom

In [None]:
jumbo_premium_gold_pack_75_percent = jumbo_premium_gold_pack_75_num/jumbo_premium_gold_pack_denom
jumbo_premium_gold_pack_82_percent = jumbo_premium_gold_pack_82_num/jumbo_premium_gold_pack_denom
jumbo_premium_gold_pack_85_percent = jumbo_premium_gold_pack_85_num/jumbo_premium_gold_pack_denom


print(jumbo_premium_gold_pack_75_percent)
print(jumbo_premium_gold_pack_82_percent)
print(jumbo_premium_gold_pack_85_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'jumbo premium gold pack')]['Top Rating']);

## 5: Prime Gold Players Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 80.00%
#### Gold 87+ : 4.20%

In [None]:
prime_gold_players_pack_75 = data.loc[((data['Pack Type'] == 'prime gold players pack') & (data['Top Rating'] >= 75))]
prime_gold_players_pack_82 = data.loc[((data['Pack Type'] == 'prime gold players pack') & (data['Top Rating'] >= 82))]
prime_gold_players_pack_87 = data.loc[((data['Pack Type'] == 'prime gold players pack') & (data['Top Rating'] >= 87))]

In [None]:
print(f'75+ Count : {len(prime_gold_players_pack_75)}')
print(f'82+ Count : {len(prime_gold_players_pack_82)}')
print(f'87+ Count : {len(prime_gold_players_pack_87)}')

In [None]:
prime_gold_players_pack_75_num = len(data[(data['Pack Type'] == 'prime gold players pack') & (data['Top Rating'] >= 75)])
prime_gold_players_pack_82_num = len(data[(data['Pack Type'] == 'prime gold players pack') & (data['Top Rating'] >= 82)])
prime_gold_players_pack_87_num = len(data[(data['Pack Type'] == 'prime gold players pack') & (data['Top Rating'] >= 87)])

print(prime_gold_players_pack_75_num)
print(prime_gold_players_pack_82_num)
print(prime_gold_players_pack_87_num)

In [None]:
prime_gold_players_pack_denom = len(data[(data['Pack Type'] == 'prime gold players pack')])
prime_gold_players_pack_denom

In [None]:
prime_gold_players_pack_75_percent = prime_gold_players_pack_75_num/prime_gold_players_pack_denom
prime_gold_players_pack_82_percent = prime_gold_players_pack_82_num/prime_gold_players_pack_denom
prime_gold_players_pack_84_percent = prime_gold_players_pack_87_num/prime_gold_players_pack_denom


print(prime_gold_players_pack_75_percent)
print(prime_gold_players_pack_82_percent)
print(prime_gold_players_pack_84_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'prime gold players pack')]['Top Rating']);

## 6: Rare Gold Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 55.00%
#### Gold 86+ : 4.30%

In [None]:
rare_gold_pack_75 = data.loc[((data['Pack Type'] == 'rare gold pack') & (data['Top Rating'] >= 75))]
rare_gold_pack_82 = data.loc[((data['Pack Type'] == 'rare gold pack') & (data['Top Rating'] >= 82))]
rare_gold_pack_86 = data.loc[((data['Pack Type'] == 'rare gold pack') & (data['Top Rating'] >= 86))]

In [None]:
print(f'75+ Count : {len(rare_gold_pack_75)}')
print(f'82+ Count : {len(rare_gold_pack_82)}')
print(f'86+ Count : {len(rare_gold_pack_86)}')

In [None]:
rare_gold_pack_75_num = len(data[(data['Pack Type'] == 'rare gold pack') & (data['Top Rating'] >= 75)])
rare_gold_pack_82_num = len(data[(data['Pack Type'] == 'rare gold pack') & (data['Top Rating'] >= 82)])
rare_gold_pack_86_num = len(data[(data['Pack Type'] == 'rare gold pack') & (data['Top Rating'] >= 86)])

print(rare_gold_pack_75_num)
print(rare_gold_pack_82_num)
print(rare_gold_pack_86_num)

In [None]:
rare_gold_pack_denom = len(data[(data['Pack Type'] == 'rare gold pack')])
rare_gold_pack_denom

In [None]:
rare_gold_pack_75_percent = rare_gold_pack_75_num/rare_gold_pack_denom
rare_gold_pack_82_percent = rare_gold_pack_82_num/rare_gold_pack_denom
rare_gold_pack_86_percent = rare_gold_pack_86_num/rare_gold_pack_denom


print(rare_gold_pack_75_percent)
print(rare_gold_pack_82_percent)
print(rare_gold_pack_86_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'rare gold pack')]['Top Rating']);

## 7: Mega Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 79.00%
#### Gold 87+ : 4.50%

In [None]:
mega_pack_75 = data.loc[((data['Pack Type'] == 'mega pack') & (data['Top Rating'] >= 75))]
mega_pack_82 = data.loc[((data['Pack Type'] == 'mega pack') & (data['Top Rating'] >= 82))]
mega_pack_87 = data.loc[((data['Pack Type'] == 'mega pack') & (data['Top Rating'] >= 87))]

In [None]:
print(f'75+ Count : {len(mega_pack_75)}')
print(f'82+ Count : {len(mega_pack_82)}')
print(f'87+ Count : {len(mega_pack_87)}')

In [None]:
mega_pack_75_num = len(data[(data['Pack Type'] == 'mega pack') & (data['Top Rating'] >= 75)])
mega_pack_82_num = len(data[(data['Pack Type'] == 'mega pack') & (data['Top Rating'] >= 82)])
mega_pack_87_num = len(data[(data['Pack Type'] == 'mega pack') & (data['Top Rating'] >= 87)])

print(mega_pack_75_num)
print(mega_pack_82_num)
print(mega_pack_87_num)

In [None]:
mega_pack_denom = len(data[(data['Pack Type'] == 'mega pack')])
mega_pack_denom

In [None]:
mega_pack_75_percent = mega_pack_75_num/mega_pack_denom
mega_pack_82_percent = mega_pack_82_num/mega_pack_denom
mega_pack_87_percent = mega_pack_87_num/mega_pack_denom


print(mega_pack_75_percent)
print(mega_pack_82_percent)
print(mega_pack_87_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'mega pack')]['Top Rating']);

## 8: Rare Players Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 95.00%
#### Gold 88+ : 5.00%

In [None]:
rare_players_pack_75 = data.loc[((data['Pack Type'] == 'rare players pack') & (data['Top Rating'] >= 75))]
rare_players_pack_82 = data.loc[((data['Pack Type'] == 'rare players pack') & (data['Top Rating'] >= 82))]
rare_players_pack_88 = data.loc[((data['Pack Type'] == 'rare players pack') & (data['Top Rating'] >= 88))]

In [None]:
print(f'75+ Count : {len(rare_players_pack_75)}')
print(f'82+ Count : {len(rare_players_pack_82)}')
print(f'88+ Count : {len(rare_players_pack_88)}')

In [None]:
rare_players_pack_75_num = len(data[(data['Pack Type'] == 'rare players pack') & (data['Top Rating'] >= 75)])
rare_players_pack_82_num = len(data[(data['Pack Type'] == 'rare players pack') & (data['Top Rating'] >= 82)])
rare_players_pack_88_num = len(data[(data['Pack Type'] == 'rare players pack') & (data['Top Rating'] >= 88)])

print(rare_players_pack_75_num)
print(rare_players_pack_82_num)
print(rare_players_pack_88_num)

In [None]:
rare_players_pack_denom = len(data[(data['Pack Type'] == 'rare players pack')])
rare_players_pack_denom

In [None]:
rare_players_pack_75_percent = rare_players_pack_75_num/rare_players_pack_denom
rare_players_pack_82_percent = rare_players_pack_82_num/rare_players_pack_denom
rare_players_pack_88_percent = rare_players_pack_88_num/rare_players_pack_denom


print(rare_players_pack_75_percent)
print(rare_players_pack_82_percent)
print(rare_players_pack_88_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'rare players pack')]['Top Rating']);

## 9: Premium Electrum Players Pack

#### Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 41.00%
#### Gold 85+ : 4.30%

In [None]:
premium_electrum_players_pack_75 = data.loc[((data['Pack Type'] == 'premium electrum players pack') & (data['Top Rating'] >= 75))]
premium_electrum_players_pack_82 = data.loc[((data['Pack Type'] == 'premium electrum players pack') & (data['Top Rating'] >= 82))]
premium_electrum_players_pack_85 = data.loc[((data['Pack Type'] == 'premium electrum players pack') & (data['Top Rating'] >= 85))]

In [None]:
print(f'75+ Count : {len(premium_electrum_players_pack_75)}')
print(f'82+ Count : {len(premium_electrum_players_pack_82)}')
print(f'85+ Count : {len(premium_electrum_players_pack_85)}')

In [None]:
premium_electrum_players_pack_75_num = len(data[(data['Pack Type'] == 'premium electrum players pack') & (data['Top Rating'] >= 75)])
premium_electrum_players_pack_82_num = len(data[(data['Pack Type'] == 'premium electrum players pack') & (data['Top Rating'] >= 82)])
premium_electrum_players_pack_85_num = len(data[(data['Pack Type'] == 'premium electrum players pack') & (data['Top Rating'] >= 85)])

print(premium_electrum_players_pack_75_num)
print(premium_electrum_players_pack_82_num)
print(premium_electrum_players_pack_85_num)

In [None]:
premium_electrum_players_pack_denom = len(data[(data['Pack Type'] == 'premium electrum players pack')])
premium_electrum_players_pack_denom

In [None]:
premium_electrum_players_pack_75_percent = premium_electrum_players_pack_75_num/premium_electrum_players_pack_denom
premium_electrum_players_pack_82_percent = premium_electrum_players_pack_82_num/premium_electrum_players_pack_denom
premium_electrum_players_pack_85_percent = premium_electrum_players_pack_85_num/premium_electrum_players_pack_denom


print(premium_electrum_players_pack_75_percent)
print(premium_electrum_players_pack_82_percent)
print(premium_electrum_players_pack_85_percent)

In [None]:
sns.boxplot(data[(data['Pack Type'] == 'premium electrum players pack')]['Top Rating']);

## Hypothesis

### 1) The probabilities of getting good players in FIFA 20 packs are lower than the pack probabilities posted by EA Sports simulation model.

### 2) EA Sports is not telling the truth when it comes to the true probability of getting goods players in packs.

### 3) Could the true probability of getting good players in packs be significant enough to compare this case to DraftKings average DFS contest results? Assuming the true probability is lower than what EA Sports tells us.

## Binomial Test

In [None]:
# Binomial test compares a sample proportion to a hypothesized proportion.

# binom_test(x,n=None,p=0.5,alternative='two-sided')
# x: # of successes
# n: total number of trials
# p: probability of success on each trial
# alternative: alternative hypothesis. Default is 'two-sided' but can also specify 'greater' or 'less'


# alternative = what I want to test for (aka pack weight is lower than advertised)

### 1) Gold Pack - Binomial Test

#### Fifa Odds:
##### Gold 75+ : 100.00%
##### Gold 82+ : 7.70%
##### Gold 84+ : 3.40%


##### My Odds:
#### Gold 75+ : 100.00%
#### Gold 82+ : 12.62%
#### Gold 84+ : 0.97%


#### My Count: n=168
##### 75+ Count : 
##### 82+ Count : 13
##### 84+ Count : 1

In [None]:
# 82+
# h0: rating > 7.7%
# ha: rating <= 7.7%
# x = 20
# n=168
# p=7.7%

binom_test(x=13, n=103, p=.077, alternative='less')

In [None]:
# Because the p value (.053) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 7.7%

In [None]:
# 84+ 3.4
# h0: rating > 3.4%
# ha: rating <= 3.4%
# x = 4
# n=168
# p=3.4

binom_test(x=1, n=103, p=.034, alternative='less')

In [None]:
# Because the p value (.971) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 84+ pack rating is biased to =< 3.40%

### 2) Premium Gold Pack - Binomial Test

#### Fifa Odds:
##### Gold 75+ : 100.00%
##### Gold 82+ : 19.00%
##### Gold 84+ : 4.90%

#### My Odds:
##### Gold 75+ : 100.00%
##### Gold 82+ : 21.31%
##### Gold 84+ : 8.20%

#### My Count: n=650
##### Gold 75+ : 61
##### Gold 82+ : 13
##### Gold 84+ : 5

In [None]:
# 82+
# h0: get 82+ rating > 21.31% of the time
# ha: get 82+ rating <= 21.31% of the time
# x = 121
# n=632
# p=19%

binom_test(x=121, n=632, p=.19, alternative='less')

In [None]:
# Because the p value (.371) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 19%

In [None]:
# 84+
# h0: get 84+ rating > 4.9% of the time
# ha: get 84+ rating <= 4.9% of the time
# x = 22
# n=632
# p=4.7%

binom_test(x=22, n=632, p=.049, alternative='less')

In [None]:
# If I can find odds with p>= 5% It will work

In [None]:
# Because the p value (.158) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 84+ pack rating is biased to =< 4.7%

### 3) Premium Gold Players Pack

In [None]:
# 82+
# h0: get 82+ rating > 59% of the time
# ha: get 82+ rating <= 59% of the time
# x = 50
# n=94
# p=59%

binom_test(x=66, n=126, p=.59, alternative='less')

In [None]:
# Because the p value (.893) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 59%

In [None]:
# 86+
# h0: get 86+ rating > 4.7% of the time
# ha: get 86+ rating <= 4.7% of the time
# x = 5
# n=61
# p=4.7%

binom_test(x=6, n=126, p=.036, alternative='less')

In [None]:
# Because the p value (.123) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 86+ pack rating is biased to =< 3.6%

### 4) Jumbo Premium Gold Pack

In [None]:
# 82+
# h0: get 82+ rating > 42% of the time
# ha: get 82+ rating <= 42% of the time
# x = 22
# n=64
# p=42%

binom_test(x=33, n=93, p=.42, alternative='less')

In [None]:
# Because the p value (.914) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 42%

In [None]:
# 85+
# h0: get 85+ rating > 4.6% of the time
# ha: get 85+ rating <= 4.6% of the time
# x = 4
# n=64
# p=4.6%

binom_test(x=4, n=94, p=.046, alternative='less')

In [None]:
# Because the p value (.339) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 85+ pack rating is biased to =< 4.6%

### 5) Prime Gold Players Pack

In [None]:
# 82+
# h0: get 82+ rating > 80% of the time
# ha: get 82+ rating <= 80% of the time
# x = 36
# n=48
# p=80%

binom_test(x=36, n=48, p=.8, alternative='less')

In [None]:
# Because the p value (.852) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 80%

In [None]:
# 87+
# h0: get 87+ rating > 4.2% of the time
# ha: get 87+ rating <= 4.2% of the time
# x = 3
# n=48
# p=4.2%

binom_test(x=3, n=48, p=.042, alternative='less')

In [None]:
# Because the p value (.327) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 87+ pack rating is biased to =< 4.2%

### 6) Rare Gold Pack

In [None]:
# 82+
# h0: get 82+ rating > 55% of the time
# ha: get 82+ rating <= 55% of the time
# x = 21
# n=43
# p=55%

binom_test(x=21, n=43, p=.55, alternative='less')

In [None]:
# Because the p value (.832) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 55%

In [None]:
# 86+
# h0: get 86+ rating > 4.3% of the time
# ha: get 86+ rating <= 4.3% of the time
# x = 5
# n=43
# p=4.3%

binom_test(x=5, n=43, p=.043, alternative='less')

In [None]:
# Because the p value (.036) IS less than .05, we reject the null hypothesis.
# We have sufficient evidence to say the 87+ pack rating is biased to =< 4.2%

### 7) Mega Pack

In [None]:
# 82+
# h0: get 82+ rating > 79% of the time
# ha: get 82+ rating <= 79% of the time
# x = 81
# n=95
# p=79%

binom_test(x=100, n=118, p=.79, alternative='less')

In [None]:
# Because the p value (.080) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 79%

In [None]:
# 87+
# h0: get 87+ rating > 4.5% of the time
# ha: get 87+ rating <= 4.5% of the time
# x = 4
# n=95
# p=4.5%

binom_test(x=4, n=118, p=.045, alternative='less')

In [None]:
# Because the p value (.623) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 4.5%

### 8) Rare Players Pack

In [None]:
# 82+
# h0: get 82+ rating > 95% of the time
# ha: get 82+ rating <= 95% of the time
# x = 100
# n=103
# p=95%

binom_test(x=147, n=152, p=.95, alternative='less')

In [None]:
# Because the p value (.852) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 80%

In [None]:
# 88+
# h0: get 88+ rating > 5% of the time
# ha: get 88+ rating <= 5% of the time
# x = 11
# n=103
# p=5%

binom_test(x=14, n=152, p=.05, alternative='less')

In [None]:
# Because the p value (.852) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 80%

In [None]:
### 9)Premium Electrum Players Pack

In [None]:
# 82+
# h0: get 82+ rating > 41% of the time
# ha: get 82+ rating <= 41% of the time
# x = 18
# n=45
# p=41%

binom_test(x=18, n=45, p=.41, alternative='less')

In [None]:
# Because the p value (.852) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 80%

In [None]:
# 85+
# h0: get 85+ rating > 4.3% of the time
# ha: get 85+ rating <= 4.3% of the time
# x = 5
# n=45
# p=4.3%

binom_test(x=5, n=45, p=.043, alternative='less')

In [None]:
# Because the p value (.852) IS NOT less than .05, we fail to reject the null hypothesis.
# We do not have sufficient evidence to say the 82+ pack rating is biased to =< 80%

In [None]:
# https://www.statology.org/binomial-test-python/

# Jeff
# If your p-value is greater than 0.05, that means you fail to reject the null hypothesis and conclude that the actual probability is greater than or equal to what FIFA said
# If your p-value is less than 0.05, you reject the null hypothesis and conclude that the actual probability is less than what FIFA said

# Me
# Because the p value is greater than 0.05, we fail to reject the null hypothesis and conclude that the actual pack probability is greater than or equal to Fifa's stated pack probability.
# Because the p value is less than 0.05, we reject the null hypothesis and conclude that the actual pack probability is less than Fifa's stated pack probability.

## Do pack by WO, board