In [17]:
import pandas as pd
import numpy as np

Task 1: Create a Pandas Series from a list, NumPy array, and dictionary

In [3]:
list_series = pd.Series([10, 20, 30, 40, 50])
numpy_series = pd.Series(np.array([1, 2, 3, 4, 5]))
dict_series = pd.Series({'a': 100, 'b': 200, 'c': 300})

In [4]:
print("Series from list:\n", list_series)
print("\nSeries from numpy array:\n", numpy_series)
print("\nSeries from dictionary:\n", dict_series)

Series from list:
 0    10
1    20
2    30
3    40
4    50
dtype: int64

Series from numpy array:
 0    1
1    2
2    3
3    4
4    5
dtype: int64

Series from dictionary:
 a    100
b    200
c    300
dtype: int64


Task 2: Assign a custom index to the Series

In [5]:
custom_index_series = pd.Series([5, 10, 15, 20], index=['A', 'B', 'C', 'D'])
print("\nSeries with custom index:\n", custom_index_series)


Series with custom index:
 A     5
B    10
C    15
D    20
dtype: int64


Task 3: Perform basic arithmetic operations on Series

In [6]:
series1 = pd.Series([1, 2, 3, 4, 5])
series2 = pd.Series([10, 20, 30, 40, 50])
sum_series = series1 + series2
product_series = series1 * series2
print("\nSum of two series:\n", sum_series)
print("\nProduct of two series:\n", product_series)


Sum of two series:
 0    11
1    22
2    33
3    44
4    55
dtype: int64

Product of two series:
 0     10
1     40
2     90
3    160
4    250
dtype: int64


Task 4: Access elements using index labels and positions

In [9]:
print("\nAccess element by index label (custom index series 'B'):", custom_index_series['B'])
print("\nAccess element by position (first element in list_series):", list_series.iloc[0])


Access element by index label (custom index series 'B'): 10

Access element by position (first element in list_series): 10


Task 5: Filter Series to include only values greater than a specific threshold

In [10]:
filtered_series = list_series[list_series > 25]
print("\nFiltered Series (values > 25):\n", filtered_series)


Filtered Series (values > 25):
 2    30
3    40
4    50
dtype: int64


Task 6: Create a DataFrame from a dictionary of lists

In [15]:
data_dict = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
}
df_dict = pd.DataFrame(data_dict)
print("\nDataFrame from dictionary:\n", df_dict)


DataFrame from dictionary:
       Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
3    David   40   80000


Task 7: Create a DataFrame from a NumPy array, specifying column and index names

In [16]:
array_data = np.array([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
df_numpy = pd.DataFrame(array_data, columns=['A','B', 'C'], index=['Row1', 'Row2', 'Row3'])
print("\nDataFrame from NumPy array:\n", df_numpy)


DataFrame from NumPy array:
        A   B   C
Row1  10  20  30
Row2  40  50  60
Row3  70  80  90


Task 8: Load a DataFrame from a CSV file

In [19]:
df_csv = pd.read_csv('homeprices_data.csv')

Task 9: Display the first and last five rows of the DataFrame

In [20]:
print("\nFirst five rows of DataFrame:\n", df_csv.head())
print("\nLast five rows of DataFrame:\n", df_csv.tail())


First five rows of DataFrame:
    area  bedrooms   price
0  1056         2   39.07
1  2600         4  120.00
2  1440         3   62.00
3  1521         3   75.00
4  1200         2   51.00

Last five rows of DataFrame:
     area  bedrooms  price
15  1175         2   42.0
16  1180         3   48.0
17  1540         3   60.0
18  2770         3  102.0
19   800         1   32.0


Task 10: Get summary statistics of the DataFrame

In [22]:
print("\nSummary statistics:\n", df_csv.describe())


Summary statistics:
               area   bedrooms       price
count    20.000000  20.000000   20.000000
mean   1821.450000   2.900000   78.853500
std     864.615794   0.967906   43.761901
min     800.000000   1.000000   32.000000
25%    1173.750000   2.000000   41.500000
50%    1480.500000   3.000000   61.000000
75%    2633.000000   3.250000  106.500000
max    3700.000000   5.000000  167.000000


Task 11: Extract a specific column as a Series

In [23]:
area_series = df_csv['area']
print("\nExtracted 'Age' column as Series:\n", area_series)


Extracted 'Age' column as Series:
 0     1056
1     2600
2     1440
3     1521
4     1200
5     1170
6     2732
7     3300
8     1310
9     3700
10    1800
11    2785
12    1000
13    1100
14    2250
15    1175
16    1180
17    1540
18    2770
19     800
Name: area, dtype: int64


Task 12: Filter rows based on column values

In [24]:
filtered_df = df_csv[df_csv['area'] > 1200]
print("\nFiltered rows where Area > 1200:\n", filtered_df)


Filtered rows where Area > 1200:
     area  bedrooms  price
1   2600         4  120.0
2   1440         3   62.0
3   1521         3   75.0
6   2732         4  135.0
7   3300         4  155.0
8   1310         3   50.0
9   3700         5  167.0
10  1800         3   82.0
11  2785         4  140.0
14  2250         3  101.0
17  1540         3   60.0
18  2770         3  102.0


Task 13: Select rows based on multiple conditions

In [28]:

filtered_df = df_csv[(df_csv['area'] > 1200) & (df_csv['price'] > 120)]
print("\nFiltered rows where Area > 1200: and Price > 120\n", filtered_df)


Filtered rows where Area > 1200: and Price > 120
     area  bedrooms  price
6   2732         4  135.0
7   3300         4  155.0
9   3700         5  167.0
11  2785         4  140.0


Task 14: Add a new column to the DataFrame

In [32]:
df_csv['baths'] = df_csv['bedrooms']-1
print("\nDataFrame after adding 'Bath Rooms' column:\n", df_csv)


DataFrame after adding 'Bath Rooms' column:
     area  bedrooms   price  baths
0   1056         2   39.07      1
1   2600         4  120.00      3
2   1440         3   62.00      2
3   1521         3   75.00      2
4   1200         2   51.00      1
5   1170         2   38.00      1
6   2732         4  135.00      3
7   3300         4  155.00      3
8   1310         3   50.00      2
9   3700         5  167.00      4
10  1800         3   82.00      2
11  2785         4  140.00      3
12  1000         2   38.00      1
13  1100         2   40.00      1
14  2250         3  101.00      2
15  1175         2   42.00      1
16  1180         3   48.00      2
17  1540         3   60.00      2
18  2770         3  102.00      2
19   800         1   32.00      0


Task 15: Delete a column from the DataFrame

In [33]:
df_csv.drop(columns=['baths'], inplace=True)
print("\nDataFrame after deleting 'Bath Rooms' column:\n", df_csv)


DataFrame after deleting 'Bath Rooms' column:
     area  bedrooms   price
0   1056         2   39.07
1   2600         4  120.00
2   1440         3   62.00
3   1521         3   75.00
4   1200         2   51.00
5   1170         2   38.00
6   2732         4  135.00
7   3300         4  155.00
8   1310         3   50.00
9   3700         5  167.00
10  1800         3   82.00
11  2785         4  140.00
12  1000         2   38.00
13  1100         2   40.00
14  2250         3  101.00
15  1175         2   42.00
16  1180         3   48.00
17  1540         3   60.00
18  2770         3  102.00
19   800         1   32.00


Task 16: Rename columns in the DataFrame

In [36]:
df_csv.rename(columns={'area': 'Total Area'}, inplace=True)
print("\nDataFrame after renaming columns:\n", df_csv)


DataFrame after renaming columns:
     Total Area  bedrooms   price
0         1056         2   39.07
1         2600         4  120.00
2         1440         3   62.00
3         1521         3   75.00
4         1200         2   51.00
5         1170         2   38.00
6         2732         4  135.00
7         3300         4  155.00
8         1310         3   50.00
9         3700         5  167.00
10        1800         3   82.00
11        2785         4  140.00
12        1000         2   38.00
13        1100         2   40.00
14        2250         3  101.00
15        1175         2   42.00
16        1180         3   48.00
17        1540         3   60.00
18        2770         3  102.00
19         800         1   32.00
