In [None]:
import pandas as pd

In [None]:
data = {'Name': ['John', 'Anna', 'Peter'],
        'Age': [35,28,42],
        'City': ['NY', 'London', 'Delhi']
       }
print(data)

In [None]:
df = pd.DataFrame(data)
print(df.head(2))

In [None]:
df.describe()

In [None]:
df[['Name', 'Age']]

In [None]:
df['Occupation'] = ['Engineer', 'Artist', 'Doctor']
df

In [None]:
filter_df = df[df['Age']>30]
filter_df

In [None]:
s_df = df.sort_values('Age')
s_df

**Numpy Section**

In [None]:
import numpy as np

In [None]:
arr1 = np.array([1,2,3,4,5])
arr1

In [None]:
arr2 = np.array([[1,2,3],[4,5,6]])
arr2

In [None]:
sum_arr1 = np.sum(arr1)
sum_arr1

In [None]:
sum_arr2 = np.sum(arr2)
sum_arr2

In [None]:
mean_arr2 = np.mean(arr2, axis=1)
mean_arr2

In [None]:
mul_arr1 = arr1 * 2
mul_arr1

In [None]:
arr3 = np.array([[1,2],[3,4]])
arr4 = np.array([[5,6],[7,8]])

mat_mul = np.dot(arr3, arr4)
mat_mul

**Matplotlib**

In [None]:
from matplotlib import pyplot as plt

In [None]:
x = np.linspace(0,10,100)
x

In [None]:
y1 = np.sin(x)
y2 = np.cos(x)

In [None]:
plt.figure(figsize=(8,6))
plt.plot(x,y1,label='Sin(x)',color='b',linestyle='--')
plt.plot(x,y2,label='Cos(x)',color='r',linestyle='-')
plt.title('Sin Function Line Chart')
plt.xlabel('Time')
plt.ylabel('Mag')
plt.legend()
plt.show()

# Capstone Project Section

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("/kaggle/input/cars4u/used_cars_data.csv")

In [None]:
data.head()

In [None]:
data.shape

In [None]:
data.tail()

In [None]:
data.info()

In [None]:
data.nunique()

In [None]:
data.isnull().sum()

In [None]:
data.isnull().sum()/len(data) * 100

In [None]:
data = data.drop(['S.No.'], axis=1)
data.head()

In [None]:
from datetime import date

In [None]:
data['Age'] = date.today().year - data['Year']
data.head()

In [None]:
data['Brand'] = data.Name.str.split().str.get(0)
data.head()

In [None]:
data['Model'] = data.Name.str.split().str.get(1) + ' ' + data.Name.str.split().str.get(2)
data.head()

In [None]:
data[['Name', 'Brand', 'Model']]

In [None]:
data.Brand.unique()

In [None]:
searchfor = ['Isuzu', 'ISUZU', 'Land', 'Mini']
data[data.Brand.str.contains('|'.join(searchfor))].head()

In [None]:
data['Brand'].replace({'ISUZU':'Isuzu', 'Mini':'Mini Cooper', 'Land' : 'Land Rover'}, inplace=True)
data.head()

In [None]:
data.describe().T

In [None]:
cat_cols = data.select_dtypes(include=['object']).columns
cat_cols

In [None]:
num_cols = data.select_dtypes(include=np.number).columns.to_list()
num_cols

## Exploratory Data Analysis (EDA)

In [None]:
for col in num_cols:
    print(col, 'Skew:',round(data[col].skew(),2))
    
    plt.figure(figsize=(15,4))
    plt.subplot(1,2,1)
    data[col].hist(grid=False)
    plt.ylabel('Count')
    plt.subplot(1,2,2)
    sns.boxplot(x=data[col])
    plt.show()

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (18, 18))
fig.suptitle('Bar plot for all categorical variables in the dataset')
sns.countplot(ax = axes[0, 0], x = 'Fuel_Type', data = data, color = 'blue', 
              order = data['Fuel_Type'].value_counts().index)
sns.countplot(ax = axes[0, 1], x = 'Transmission', data = data, color = 'blue', 
              order = data['Transmission'].value_counts().index)
sns.countplot(ax = axes[1, 0], x = 'Owner_Type', data = data, color = 'blue', 
              order = data['Owner_Type'].value_counts().index)
sns.countplot(ax = axes[1, 1], x = 'Location', data = data, color = 'blue', 
              order = data['Location'].value_counts().index)
sns.countplot(ax = axes[2, 0], x = 'Brand', data = data, color = 'blue', 
              order = data['Brand'].head(20).value_counts().index)
sns.countplot(ax = axes[2, 1], x = 'Model', data = data, color = 'blue', 
              order = data['Model'].head(20).value_counts().index)
axes[1][1].tick_params(labelrotation=45)
axes[2][0].tick_params(labelrotation=90)
axes[2][1].tick_params(labelrotation=90)

In [None]:
dt = {'A':[1,2,3,4,5],
      'B':[2,4,6,8,10],
      'C':[3,6,9,12,15],
      'D':[4,8,12,16,20]
     }
df = pd.DataFrame(dt)
df.head()

In [None]:
sns.pairplot(df, diag_kind="kde")

In [None]:
plt.figure(figsize=(13,7))
sns.pairplot(data)
plt.show()

In [None]:
fig, axarr = plt.subplots(2, 2, figsize=(10, 12))
data.groupby('Location')['Price'].mean().sort_values(ascending=False).plot.bar(ax=axarr[0][0], fontsize=12)
axarr[0][0].set_title("Location Vs Price", fontsize=18)
data.groupby('Transmission')['Price'].mean().sort_values(ascending=False).plot.bar(ax=axarr[0][1], fontsize=12)
axarr[0][1].set_title("Transmission Vs Price", fontsize=18)
data.groupby('Fuel_Type')['Price'].mean().sort_values(ascending=False).plot.bar(ax=axarr[1][0], fontsize=12)
axarr[1][0].set_title("Fuel_Type Vs Price", fontsize=18)
data.groupby('Owner_Type')['Price'].mean().sort_values(ascending=False).plot.bar(ax=axarr[1][1], fontsize=12)
axarr[1][1].set_title("Owner_Type Vs Price", fontsize=18)

plt.subplots_adjust(hspace=1.0)
plt.subplots_adjust(wspace=.5)
sns.despine()