### Pandas Fundamentals: Practice

Description: This exercise is designed to practice data manipulation and analysis usin Pandas. 
You will work with the IPL matches datasetto assist tasks involving data access, cleaning, and basic preprocessing.

### Install and import pandas library

In [None]:
import pandas as pd

### Read IPL matches csv file and have sneak view of data

In [None]:
ipl_matches_df = pd.read_csv('ipl_matches.csv')

In [None]:
print(type(ipl_matches_df))

In [None]:
ipl_matches_df.head(1)

In [None]:
ipl_matches_df.tail(1)

In [None]:
ipl_matches_df.shape  # return tuple

In [None]:
ipl_matches_df.shape[0] # number of rows

In [None]:
ipl_matches_df.shape[1] # number of columns

In [None]:
print(ipl_matches_df.size) # number of elements

### Data Overview and Missing Values Analysis

In [None]:
ipl_matches_df.info()

In [None]:
ipl_matches_df.isna().sum()

In [None]:
type(ipl_matches_df.isna().sum())

In [None]:
ipl_matches_df.isna().sum().umpire3

In [None]:
ipl_matches_df.city.isna().sum()

### Data Cleaning and Transformation with Pandas 

In [None]:
import numpy as np
arr1 = np.arange(12).reshape(-1, 1)

In [None]:
arr1

In [None]:
import numpy as np
df = pd.DataFrame(np.arange(12).reshape(3, 4),
                  columns=['A', 'B', 'C', 'D'])

In [None]:
df.drop(['B', 'C'], axis=1)

In [None]:
df_new = df.drop(['B', 'C'], axis=1)

In [None]:
df.drop(['B', 'C'], axis=1, inplace=True)

In [None]:
ipl_matches_df.head(3)

In [None]:
ipl_matches_df.toss_decision.to_numpy()

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

In [None]:
le.fit(ipl_matches_df.toss_decision)

In [None]:
# ipl_matches_df.toss_decision = le.transform(ipl_matches_df.toss_decision)

In [None]:
le.classes_

### Data Description and Selection with Pandas

In [None]:
ipl_matches_df.describe()

In [None]:
ipl_stats = ipl_matches_df.describe()

In [None]:
ipl_stats.loc['max']

In [None]:
ipl_stats.loc['min':'max']

In [None]:
ipl_matches_df.loc[1]

In [None]:
ipl_matches_df.loc[1, 'season']

In [None]:
ipl_matches_df.iloc[1]

In [None]:
ipl_matches_df.iloc[1:3]

In [None]:
ipl_matches_df.iloc[1:3, 4] # [row, column]

In [None]:
ipl_matches_df.iloc[1:3, [4, 5, 10]] # [row, column]

### Data Filtering, Counting and Sorting with Pandas

In [None]:
ipl_matches_df['city'] == 'Hyderabad'

In [None]:
mask = ipl_matches_df['city'] == 'Hyderabad'

In [None]:
ipl_matches_df[mask]

In [None]:
ipl_matches_df[ipl_matches_df['city'] == 'Hyderabad']

In [None]:
mask1 = ipl_matches_df['city'] == 'Hyderabad'
mask2 = ipl_matches_df['season'] == 2017
ipl_matches_df[mask1 & mask2]

In [None]:
ipl_matches_df['winner'].value_counts()

In [None]:
total_matches = ipl_matches_df['team1'].value_counts() + ipl_matches_df['team2'].value_counts()

In [None]:
total_matches.sort_values()

In [None]:
total_matches.sort_values(ascending=False)

### Splitting Data for Training and Testing with Scikit-Learn

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Sample DataFrame
data = pd.DataFrame({
    'Feature1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Feature2': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'Label': [0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
})

X = data[['Feature1', 'Feature2']]  # Features
y = data['Label']                   # Labels

# Split into training and test sets (80% train, 20% test)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print("Training Features:\n", X_train)
print("\nTesting Features:\n", X_test)
print("\nTraining Labels:\n", y_train)
print("\nTesting Labels:\n", y_test)
