
**Task 12: Introduction to Pandas (Series, DataFrame basics)**

In [2]:
import pandas as pd
import numpy as np

In [3]:
# 1. Create a Pandas Series from a Python list, numpy array, and a dictionary.
python_list = [10, 20, 30, 40, 50]
numpy_array = np.array([1, 2, 3, 4, 5])
dictionary = {'a': 100, 'b': 200, 'c': 300}

series_from_list = pd.Series(python_list)
series_from_array = pd.Series(numpy_array)
series_from_dict = pd.Series(dictionary)


In [4]:
# 2. Assign a custom index to the Series.
custom_index_series = pd.Series(python_list, index=['A', 'B', 'C', 'D', 'E'])
custom_index_series

A    10
B    20
C    30
D    40
E    50
dtype: int64

In [20]:
# 3. Perform basic arithmetic operations on Series.
series1 = pd.Series([1, 2, 3, 4, 5])
series2 = pd.Series([10, 20, 30, 40, 50])

addition = series1 + series2
subtraction = series1 - series2
multiplication = series1 * series2
division = series1 / series2

print("Addition:",'\n',addition)
print("Subtraction:",'\n',subtraction)
print("Multiplication:",'\n', multiplication)
print("Division:",'\n', division)

Addition: 
 0    11
1    22
2    33
3    44
4    55
dtype: int64
Subtraction: 
 0    -9
1   -18
2   -27
3   -36
4   -45
dtype: int64
Multiplication: 
 0     10
1     40
2     90
3    160
4    250
dtype: int64
Division: 
 0    0.1
1    0.1
2    0.1
3    0.1
4    0.1
dtype: float64


In [8]:
# 4. Access elements using index labels and positions.
element_by_label = custom_index_series['B']
element_by_position = custom_index_series.iloc[1]
print("Element by Label:", element_by_label)
print("Element by Position:", element_by_position)

Element by Label: 20
Element by Position: 20


In [9]:
# 5. Filter the Series to include only values greater than a specific threshold.
filtered_series = series1[series1 > 2]
print("Filtered Series:", filtered_series)

Filtered Series: 2    3
3    4
4    5
dtype: int64


In [9]:
# 6. Create a DataFrame from a dictionary of lists.
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank'],
    'Age': [25, 30, 35, 28, 22, 45],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia']
}
df_from_dict = pd.DataFrame(data)
df_from_dict

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago
3,David,28,Houston
4,Eve,22,Phoenix
5,Frank,45,Philadelphia


In [11]:
# 7. Create a DataFrame from a numpy array, specifying column and index names.
array_data = np.random.rand(4, 3)
df_from_array = pd.DataFrame(array_data, columns=['Column1', 'Column2', 'Column3'], index=['Row1', 'Row2', 'Row3', 'Row4'])
df_from_array

Unnamed: 0,Column1,Column2,Column3
Row1,0.805342,0.862406,0.312102
Row2,0.158727,0.186378,0.780486
Row3,0.029546,0.043601,0.701072
Row4,0.859326,0.082606,0.436552


**DataFrame from CSV**

In [3]:
df = pd.read_csv("/content/drive/MyDrive/covid_19_clean_complete.csv")
df

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.939110,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.153300,20.168300,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.033900,1.659600,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.506300,1.521800,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.202700,17.873900,2020-01-22,0,0,0,0,Africa
...,...,...,...,...,...,...,...,...,...,...
49063,,Sao Tome and Principe,0.186400,6.613100,2020-07-27,865,14,734,117,Africa
49064,,Yemen,15.552727,48.516388,2020-07-27,1691,483,833,375,Eastern Mediterranean
49065,,Comoros,-11.645500,43.333300,2020-07-27,354,7,328,19,Africa
49066,,Tajikistan,38.861000,71.276100,2020-07-27,7235,60,6028,1147,Europe


**Display the first and last five rows of the DataFrame.**

In [4]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa


In [6]:
df.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
49063,,Sao Tome and Principe,0.1864,6.6131,2020-07-27,865,14,734,117,Africa
49064,,Yemen,15.552727,48.516388,2020-07-27,1691,483,833,375,Eastern Mediterranean
49065,,Comoros,-11.6455,43.3333,2020-07-27,354,7,328,19,Africa
49066,,Tajikistan,38.861,71.2761,2020-07-27,7235,60,6028,1147,Europe
49067,,Lesotho,-29.61,28.2336,2020-07-27,505,12,128,365,Africa


summary of the DataFrame including the mean, median, and standard deviation of numeric columns.

In [7]:
df.describe()

Unnamed: 0,Lat,Long,Confirmed,Deaths,Recovered,Active
count,49068.0,49068.0,49068.0,49068.0,49068.0,49068.0
mean,21.43373,23.528236,16884.9,884.17916,7915.713,8085.012
std,24.95032,70.44274,127300.2,6313.584411,54800.92,76258.9
min,-51.7963,-135.0,0.0,0.0,0.0,-14.0
25%,7.873054,-15.3101,4.0,0.0,0.0,0.0
50%,23.6345,21.7453,168.0,2.0,29.0,26.0
75%,41.20438,80.771797,1518.25,30.0,666.0,606.0
max,71.7069,178.065,4290259.0,148011.0,1846641.0,2816444.0


In [8]:
Countries = df['Country/Region']
print("\nSeries extracted from 'Country/Region' column:")
print(Countries)



Series extracted from 'Country/Region' column:
0                  Afghanistan
1                      Albania
2                      Algeria
3                      Andorra
4                       Angola
                 ...          
49063    Sao Tome and Principe
49064                    Yemen
49065                  Comoros
49066               Tajikistan
49067                  Lesotho
Name: Country/Region, Length: 49068, dtype: object


**Filter rows based on column values**

In [10]:
filtered_df = df[df['Country/Region'] == 'Albania']
filtered_df

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
262,,Albania,41.1533,20.1683,2020-01-23,0,0,0,0,Europe
523,,Albania,41.1533,20.1683,2020-01-24,0,0,0,0,Europe
784,,Albania,41.1533,20.1683,2020-01-25,0,0,0,0,Europe
1045,,Albania,41.1533,20.1683,2020-01-26,0,0,0,0,Europe
...,...,...,...,...,...,...,...,...,...,...
47764,,Albania,41.1533,20.1683,2020-07-23,4466,123,2523,1820,Europe
48025,,Albania,41.1533,20.1683,2020-07-24,4570,128,2608,1834,Europe
48286,,Albania,41.1533,20.1683,2020-07-25,4637,134,2637,1866,Europe
48547,,Albania,41.1533,20.1683,2020-07-26,4763,138,2682,1943,Europe


In [11]:
average_confirmed = df['Confirmed'].mean()
filtered_df = df[df['Confirmed'] > average_confirmed]
filtered_df

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
3715,Hubei,China,30.975600,112.270700,2020-02-05,19665,549,633,18483,Western Pacific
3976,Hubei,China,30.975600,112.270700,2020-02-06,22112,618,817,20677,Western Pacific
4237,Hubei,China,30.975600,112.270700,2020-02-07,24953,699,1115,23139,Western Pacific
4498,Hubei,China,30.975600,112.270700,2020-02-08,27100,780,1439,24881,Western Pacific
4759,Hubei,China,30.975600,112.270700,2020-02-09,29631,871,1795,26965,Western Pacific
...,...,...,...,...,...,...,...,...,...,...
49020,,Ukraine,48.379400,31.165600,2020-07-27,67096,1636,37202,28258,Europe
49021,,United Arab Emirates,23.424076,53.847818,2020-07-27,59177,345,52510,6322,Eastern Mediterranean
49028,,United Kingdom,55.378100,-3.436000,2020-07-27,300111,45759,0,254352,Europe
49030,,US,40.000000,-100.000000,2020-07-27,4290259,148011,1325804,2816444,Americas


**Select rows based on multiple conditions.**

In [12]:
high_confirmed_cases = df[(df['Country/Region'] == 'Albania') & (df['Confirmed'] > 8)]
high_confirmed_cases

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
12529,,Albania,41.1533,20.1683,2020-03-10,10,0,0,10,Europe
12790,,Albania,41.1533,20.1683,2020-03-11,12,1,0,11,Europe
13051,,Albania,41.1533,20.1683,2020-03-12,23,1,0,22,Europe
13312,,Albania,41.1533,20.1683,2020-03-13,33,1,0,32,Europe
13573,,Albania,41.1533,20.1683,2020-03-14,38,1,0,37,Europe
...,...,...,...,...,...,...,...,...,...,...
47764,,Albania,41.1533,20.1683,2020-07-23,4466,123,2523,1820,Europe
48025,,Albania,41.1533,20.1683,2020-07-24,4570,128,2608,1834,Europe
48286,,Albania,41.1533,20.1683,2020-07-25,4637,134,2637,1866,Europe
48547,,Albania,41.1533,20.1683,2020-07-26,4763,138,2682,1943,Europe


In [13]:
low_confirmed_with_deaths = df[(df['Confirmed'] < 10) & (df['Deaths'] == 1)]
low_confirmed_with_deaths


Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
318,Hebei,China,39.5490,116.1306,2020-01-23,1,1,0,0,Western Pacific
579,Hebei,China,39.5490,116.1306,2020-01-24,2,1,0,1,Western Pacific
580,Heilongjiang,China,47.8620,127.7615,2020-01-24,4,1,0,3,Western Pacific
840,Hebei,China,39.5490,116.1306,2020-01-25,8,1,0,7,Western Pacific
841,Heilongjiang,China,47.8620,127.7615,2020-01-25,9,1,0,8,Western Pacific
...,...,...,...,...,...,...,...,...,...,...
48009,British Virgin Islands,United Kingdom,18.4207,-64.6400,2020-07-23,8,1,7,0,Europe
48270,British Virgin Islands,United Kingdom,18.4207,-64.6400,2020-07-24,8,1,7,0,Europe
48531,British Virgin Islands,United Kingdom,18.4207,-64.6400,2020-07-25,8,1,7,0,Europe
48792,British Virgin Islands,United Kingdom,18.4207,-64.6400,2020-07-26,8,1,7,0,Europe


**new column to the DataFrame**

In [14]:
df['TotalCases'] = df['Confirmed'] + df['Deaths'] + df['Recovered']
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region,TotalCases
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa,0


**Rename columns in the DataFrame**

In [15]:
df = df.rename(columns={'Country/Region': 'Country', 'Province/State': 'State'})
df.head()

Unnamed: 0,State,Country,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region,TotalCases
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa,0
