## Introduction to Pandas (Series, DataFrame basics)


In [2]:
import pandas as pd
import numpy as np

In [20]:
# 1. Create a Pandas Series from a Python list, numpy array, and a dictionary.
python_list = [10, 20, 30, 40]
series_from_list = pd.Series(python_list)
print("\n Series from Python list:\n", series_from_list)

numpy_array = np.array([10, 20, 30, 40])
series_from_array = pd.Series(numpy_array)
print("\n Series from NumPy array:\n", series_from_array)

dict_data = {'a': 10, 'b': 20, 'c': 30}
series_from_dict = pd.Series(dict_data)
print("\n Series from dictionary:\n", series_from_dict)



 Series from Python list:
 0    10
1    20
2    30
3    40
dtype: int64

 Series from NumPy array:
 0    10
1    20
2    30
3    40
dtype: int32

 Series from dictionary:
 a    10
b    20
c    30
dtype: int64


In [6]:
# 2. Assign a custom index to the Series.
custom_index_series = pd.Series(python_list, index=['x', 'y', 'z', 'w'])
print("\n Series with custom index:\n", custom_index_series)



 Series with custom index:
 x    10
y    20
z    30
w    40
dtype: int64


In [22]:
# 3. Perform basic arithmetic operations on Series.
series_a = pd.Series([1, 2, 3, 4])
series_b = pd.Series([10, 20, 30, 40])

addition_result = series_a + series_b
print("\n Addition of two Series:\n", addition_result)

lt = series_a - series_b
print("\n Subtraction of two Series:\n", subtraction_result)



 Addition of two Series:
 0    11
1    22
2    33
3    44
dtype: int64

 Subtraction of two Series:
 0    -9
1   -18
2   -27
3   -36
dtype: int64


In [16]:
# 4. Access elements using index labels and positions.

element_by_label = custom_index_series['x']
print("\n Element by label 'x':", element_by_label)

element_by_position = custom_index_series.iloc[0]
print("\n Element by position 0:", element_by_position)



 Element by label 'x': 10

 Element by position 0: 10


In [18]:
# 5. Filter the Series to include only values greater than a specific threshold.
filtered_series = custom_index_series[custom_index_series > 20]
print("\n Filtered Series with values greater than 20:\n", filtered_series)



 Filtered Series with values greater than 20:
 z    30
w    40
dtype: int64


In [48]:
# 6. Create a DataFrame from a dictionary of lists.

data_dict = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}

df_from_dict = pd.DataFrame(data_dict)
print("\n DataFrame from dictionary of lists:\n", df_from_dict)



 DataFrame from dictionary of lists:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston


In [46]:
# 7. Create a DataFrame from a numpy array, specifying column and index names.
numpy_array = np.array([
    [10, 20, 30],
    [40, 50, 60],
    [70, 80, 90]
])

columns = ['A', 'B', 'C']
index = ['Row1', 'Row2', 'Row3']

# Create DataFrame
df_from_array = pd.DataFrame(numpy_array, columns=columns, index=index)
print("\n DataFrame from NumPy array with column and index names:\n", df_from_array)


 DataFrame from NumPy array with column and index names:
        A   B   C
Row1  10  20  30
Row2  40  50  60
Row3  70  80  90


In [28]:
# 8. Load a DataFrame from a CSV file.
df = pd.read_csv('students.csv')
print("\n DataFrame loaded from CSV file:\n", df)


 DataFrame loaded from CSV file:
       Name  Age         City Grade
0    Alice   25     New York     A
1      Bob   30  Los Angeles     B
2  Charlie   35      Chicago     A
3    David   40      Houston     C
4      Eve   22        Miami     B


In [30]:
# 9. Display the first and last five rows of the DataFrame.
print("First five rows of DataFrame:\n", df.head())
print("Last five rows of DataFrame:\n", df.tail())

First five rows of DataFrame:
       Name  Age         City Grade
0    Alice   25     New York     A
1      Bob   30  Los Angeles     B
2  Charlie   35      Chicago     A
3    David   40      Houston     C
4      Eve   22        Miami     B
Last five rows of DataFrame:
       Name  Age         City Grade
0    Alice   25     New York     A
1      Bob   30  Los Angeles     B
2  Charlie   35      Chicago     A
3    David   40      Houston     C
4      Eve   22        Miami     B


In [32]:
# 10. Get a summary of the DataFrame including the mean, median, and standard deviation of numeric columns.

print("\nSummary of DataFrame:\n", df.describe())

print("\nMean of numeric columns:\n", df.mean(numeric_only=True))

print("\nMedian of numeric columns:\n", df.median(numeric_only=True))

print("\nStandard deviation of numeric columns:\n", df.std(numeric_only=True))



Summary of DataFrame:
              Age
count   5.000000
mean   30.400000
std     7.300685
min    22.000000
25%    25.000000
50%    30.000000
75%    35.000000
max    40.000000

Mean of numeric columns:
 Age    30.4
dtype: float64

Median of numeric columns:
 Age    30.0
dtype: float64

Standard deviation of numeric columns:
 Age    7.300685
dtype: float64


In [34]:
# 11. Extract a specific column as a Series.
age_series = df['Age']
print("\nColumn 'Age' as a Series:\n", age_series)


Column 'Age' as a Series:
 0    25
1    30
2    35
3    40
4    22
Name: Age, dtype: int64


In [36]:
# 12. Filter rows based on column values.
filtered_df = df[df['Age'] > 25]
print("\nFiltered rows where Age > 25:\n", filtered_df)


Filtered rows where Age > 25:
       Name  Age         City Grade
1      Bob   30  Los Angeles     B
2  Charlie   35      Chicago     A
3    David   40      Houston     C


In [38]:
# 13. Select rows based on multiple conditions.

filtered_multiple_conditions_df = df[(df['Age'] > 25) & (df['City'] == 'New York')]
print("\nRows where Age > 25 and City is 'New York':\n", filtered_multiple_conditions_df)


Rows where Age > 25 and City is 'New York':
 Empty DataFrame
Columns: [Name, Age, City, Grade]
Index: []


In [40]:
# 14. Add a new column to the DataFrame.

num_rows = df.shape[0]
print("\nNumber of rows in the DataFrame:", num_rows)

grades = [85, 90, 95, 80, 70]

if len(grades) == num_rows:
    df['Grade'] = grades
    print("\nDataFrame with new column 'Grade':\n", df)
else:
    print(f"\nLength mismatch: DataFrame has {num_rows} rows, but 'grades' has {len(grades)} elements.")


Number of rows in the DataFrame: 5

DataFrame with new column 'Grade':
       Name  Age         City  Grade
0    Alice   25     New York     85
1      Bob   30  Los Angeles     90
2  Charlie   35      Chicago     95
3    David   40      Houston     80
4      Eve   22        Miami     70


In [42]:
# 15. Delete a column from the DataFrame.
df.drop('Grade', axis=1, inplace=True)
print("\nDataFrame after deleting column 'Grade':\n", df)


DataFrame after deleting column 'Grade':
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston
4      Eve   22        Miami


In [44]:
# 16. Rename columns in the DataFrame.
df.rename(columns={'Name': 'Student Name', 'Age': 'Student Age', 'City': 'Hometown'}, inplace=True)
print("\nDataFrame with renamed columns:\n", df)


DataFrame with renamed columns:
   Student Name  Student Age     Hometown
0        Alice           25     New York
1          Bob           30  Los Angeles
2      Charlie           35      Chicago
3        David           40      Houston
4          Eve           22        Miami
