# Import and Load Data

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
url = 'https://raw.githubusercontent.com/jonathan-data-analysis/colab_test_data/main/athletes.csv'
df = pd.read_csv(url) # convert to dataframe

## Let's Add Some Stats 💯
We can add summary statistics like count, mean, standard deviation, and more. The good news is that we won't have to create the logic from scratch. Instead, we'll use `describe()` to gather the summary statistics we need.

In [4]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
    def print_head(self):
        return self.df.head()
    def get_columns(self):
        return list(self.df.columns)
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(list(self.df.columns)))
    def get_stats(self):
        return self.df.describe()

In [5]:
data = Summary(url)
data.get_stats()

Unnamed: 0,rcc,wcc,hc,hg,ferr,bmi,ssf,pcBfat,lbm,ht,wt
count,202.0,202.0,202.0,202.0,202.0,202.0,202.0,202.0,202.0,202.0,202.0
mean,4.718614,7.108911,43.091584,14.566337,76.876238,22.955891,69.021782,13.507426,64.873713,180.10396,75.007921
std,0.457976,1.800337,3.662989,1.362451,47.501239,2.863933,32.565333,6.189826,13.070197,9.734494,13.925199
min,3.8,3.3,35.9,11.6,8.0,16.75,28.0,5.63,34.36,148.9,37.8
25%,4.3725,5.9,40.6,13.5,41.25,21.0825,43.85,8.545,54.6675,174.0,66.525
50%,4.755,6.85,43.5,14.7,65.5,22.72,58.6,11.65,63.035,179.7,74.4
75%,5.03,8.275,45.575,15.575,97.0,24.465,90.35,18.08,74.75,186.175,84.125
max,6.72,14.3,59.7,19.2,234.0,34.42,200.8,35.52,106.0,209.4,123.2


## More Summary Statistics
We can also add just the summary statistics we want, uasing `mean()`.

In [6]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
    def print_head(self):
        return self.df.head()
    def get_columns(self):
        return list(self.df.columns)
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(list(self.df.columns)))
    def get_stats(self):
        return self.df.describe()
    def get_mean(self, column):
        print(f"Mean {column}:", self.df[column].mean())


In [7]:
data = Summary(url)
data.get_mean('wt')

Mean wt: 75.0079207920792


In [8]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
    def print_head(self):
        return self.df.head()
    def get_columns(self):
        return list(self.df.columns)
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(list(self.df.columns)))
    def get_stats(self):
        return self.df.describe()
    def get_mean(self, column):
        print(f"Mean {column}:", self.df[column].mean())
    def get_standard_dev(self, column):
        print(f"STD {column}:", self.df[column].std())

In [9]:
data = Summary(url)
data.get_standard_dev('wt')

STD wt: 13.925199486183796
