# Import and Load Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
url = 'https://raw.githubusercontent.com/jonathan-data-analysis/colab_test_data/main/athletes.csv'

# convert to dataframe
df = pd.read_csv(url)
df

Unnamed: 0,rcc,wcc,hc,hg,ferr,bmi,ssf,pcBfat,lbm,ht,wt,sex,sport
0,3.96,7.5,37.5,12.3,60,20.56,109.1,19.75,63.32,195.9,78.9,f,B_Ball
1,4.41,8.3,38.2,12.7,68,20.67,102.8,21.30,58.55,189.7,74.4,f,B_Ball
2,4.14,5.0,36.4,11.6,21,21.86,104.6,19.88,55.36,177.8,69.1,f,B_Ball
3,4.11,5.3,37.3,12.6,69,21.88,126.4,23.66,57.18,185.0,74.9,f,B_Ball
4,4.45,6.8,41.5,14.0,29,18.96,80.3,17.64,53.20,184.6,64.6,f,B_Ball
...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,4.90,7.6,45.6,16.0,90,27.56,67.2,11.79,82.00,183.9,93.2,m,W_Polo
198,5.66,8.3,50.2,17.7,38,23.76,56.5,10.05,72.00,183.5,80.0,m,Tennis
199,5.03,6.4,42.7,14.3,122,22.01,47.6,8.51,68.00,183.1,73.8,m,Tennis
200,4.97,8.8,43.0,14.9,233,22.34,60.4,11.50,63.00,178.4,71.1,m,Tennis


## Adding Summary Methods for EDA
One great uses of OOP is to create a class that can be used for any dataset. A custom class can help you perform EDA (Exploratory Data Analysis) which is a common practice before starting work on a dataset, particular problem you're trying to solve, or a question you're trying to answer. Let's take a closer look starting with the columns header names.

In [4]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
        
    def print_head(self):
        return self.df.head()
    
    def get_columns(self):
        return list(self.df.columns)

In [5]:
data = Summary(url)
data.get_columns()

['rcc',
 'wcc',
 'hc',
 'hg',
 'ferr',
 'bmi',
 'ssf',
 'pcBfat',
 'lbm',
 'ht',
 'wt',
 'sex',
 'sport']

## How About Dimensions?
Let's add the row and column information to the custom class!

In [6]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
        
    def print_head(self):
        return self.df.head()
    
    def get_columns(self):
        return list(self.df.columns)
    
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(self.df.columns))

In [8]:
data = Summary(url)
data.get_dim()

Rows: 202
Columns: 13
