## Task 12: Introduction to Pandas (Series, DataFrame basics)
Submitted by: Awais Anwer

In [1]:
# Create a Pandas Series from a Python list, numpy array, and a dictionary.
# Assign a custom index to the Series.
# Perform basic arithmetic operations on Series.
# Access elements using index labels and positions.
# Filter the Series to include only values greater than a specific threshold.
# Create a DataFrame from a dictionary of lists.
# Create a DataFrame from a numpy array, specifying column and index names.
# Load a DataFrame from a CSV file.
# Display the first and last five rows of the DataFrame.
# Get a summary of the DataFrame including the mean, median, and standard deviation of numeric columns.
# Extract a specific column as a Series.
# Filter rows based on column values.
# Select rows based on multiple conditions.
# Add a new column to the DataFrame.
# Delete a column from the DataFrame.
# Rename columns in the DataFrame.

In [3]:
import pandas as pd
import numpy as np

In [10]:
# Create a Pandas Series from a Python list, numpy array, and a dictionary.
# python list
python_list = [1, 2, 3, 4, 5]
# numpy array
numpy_array = np.array([1, 2, 3, 4, 5])
# dictionary
python_dict = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

series_1 = pd.Series(python_list)
series_2 = pd.Series(numpy_array)
series_3 = pd.Series(python_dict)

print(series_1)
print(series_2)
print(series_3)

0    1
1    2
2    3
3    4
4    5
dtype: int64
0    1
1    2
2    3
3    4
4    5
dtype: int64
a    1
b    2
c    3
d    4
e    5
dtype: int64


In [11]:
# Assign a custom index to the Series.
series = pd.Series(python_list, index=['i', 'ii', 'iii', 'iv', 'v'])
print(series_1)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [13]:
# Perform basic arithmetic operations on Series.
added_series = series_1 + series_2

print("Added Series:")
print(added_series)

Added Series:
0     2
1     4
2     6
3     8
4    10
dtype: int64


In [15]:
# Access elements using index labels and positions.
print(series_3['c'])
print(series_3[2])

3
3


In [16]:
# Filter the Series to include only values greater than a specific threshold.
series_1 = series_1[series_1 > 3]
series_1

3    4
4    5
dtype: int64

In [17]:
# Create a DataFrame from a dictionary of lists.
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [18]:
# Create a DataFrame from a numpy array, specifying column and index names.
numpy_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
numpy_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [19]:
df = pd.DataFrame(numpy_array, columns=['A', 'B', 'C'], index=['Row1', 'Row2', 'Row3'])
df

Unnamed: 0,A,B,C
Row1,1,2,3
Row2,4,5,6
Row3,7,8,9


In [21]:
# Load a DataFrame from a CSV file.
df = pd.read_csv('https://raw.githubusercontent.com/mrdbourke/zero-to-mastery-ml/master/data/car-sales.csv')
df.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [22]:
# Display the first and last five rows of the DataFrame.
df.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [23]:
df.tail()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [25]:
# Get a summary of the DataFrame including the mean, median, and standard deviation of numeric columns.
df.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [27]:
# Extract a specific column as a Series.
df['Price']

0     $4,000.00
1     $5,000.00
2     $7,000.00
3    $22,000.00
4     $3,500.00
5     $4,500.00
6     $7,500.00
7     $7,000.00
8     $6,250.00
9     $9,700.00
Name: Price, dtype: object

In [28]:
# Filter rows based on column values.
df[df['Make'] == 'Toyota']

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [29]:
# Select rows based on multiple conditions.
df[(df['Make'] == 'Toyota') & (df['Odometer (KM)'] > 100000)]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"


In [31]:
# Add a new column to the DataFrame.
df['Seats'] = 4
df

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats
0,Toyota,White,150043,4,"$4,000.00",4
1,Honda,Red,87899,4,"$5,000.00",4
2,Toyota,Blue,32549,3,"$7,000.00",4
3,BMW,Black,11179,5,"$22,000.00",4
4,Nissan,White,213095,4,"$3,500.00",4
5,Toyota,Green,99213,4,"$4,500.00",4
6,Honda,Blue,45698,4,"$7,500.00",4
7,Honda,Blue,54738,4,"$7,000.00",4
8,Toyota,White,60000,4,"$6,250.00",4
9,Nissan,White,31600,4,"$9,700.00",4


In [32]:
# Delete a column from the DataFrame.
df = df.drop('Seats', axis=1)
df

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [33]:
# Rename columns in the DataFrame.
df = df.rename(columns={'Odometer (KM)': 'Odometer'})
df

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"
