## 1. Import the Data

In [None]:
import pandas as pd
data_frame = pd.read_csv("data.csv")

# Get Rows / columns
data_frame.shape

In [None]:
# Get overall information 

data_frame.describe()

In [None]:
# Get and array with values

data_frame.values

In [None]:
# Filter example

data_frame[data_frame['Age'] > 30].head()

## 2. Cleaning the Data

In [None]:
# Get value and wage
df1 = pd.DataFrame(data_frame, columns=["Name", "Wage", "Value"])

def currency_to_float(currency):
    convert_dict = dict(K=1000, M=1000000, B=1000000000)
    try: return float(currency[:-1]) * convert_dict[currency[-1]]
    except: return 0.0

# Removing € sign
df1["Wage"] = df1["Wage"].replace('[\€,]', '', regex=True).apply(currency_to_float)
df1["Value"] = df1["Value"].replace('[\€,]', '', regex=True).apply(currency_to_float)

# Can use "-" to subtract numeric columns
df1['difference'] = df1["Value"] - df1["Wage"]

# Returns Sorted values
df1.sort_values('difference', ascending=False)


### 2.1 Plot data

In [None]:
# Seaborn

import seaborn as sns
sns.set()

graph = sns.scatterplot(x="Wage", y="Value", data=df1)

In [72]:
# Bokeh
from bokeh.plotting import figure, show

# using @ to show the column value
TOOLTIPS = [
    ("index", "$index"),
    ("('Wage', 'Value')", "(@Wage, @Value)"),
    ("Name", "@Name")
]

p = figure(
    title="Soccer 2019", 
    width=700,
    height=700,
    tooltips=TOOLTIPS
)

p.circle('Wage', 'Value', size=10, source=df1)

show(p)