In [1]:

# Step 1: Set Up the Environment
# Install required libraries (requests and pandas)
!pip install requests pandas -q


In [2]:
# 2. Import required libraries
import requests
import pandas as pd
from io import StringIO
from google.colab import files

In [3]:
print("🔄 Fetching COVID-19 data...")

covid_url = "https://disease.sh/v3/covid-19/countries"
covid_response = requests.get(covid_url)

if covid_response.status_code == 200:
    covid_data = covid_response.json()
    covid_df = pd.DataFrame(covid_data)
    print("✅ COVID-19 data loaded. Showing first 5 rows:")
    display(covid_df[['country', 'cases', 'deaths', 'recovered']].head())
else:
    print("❌ Failed to fetch COVID-19 data:", covid_response.status_code)

🔄 Fetching COVID-19 data...
✅ COVID-19 data loaded. Showing first 5 rows:


Unnamed: 0,country,cases,deaths,recovered
0,Afghanistan,234174,7996,211080
1,Albania,334863,3605,330233
2,Algeria,272010,6881,183061
3,Andorra,48015,165,0
4,Angola,107327,1937,103419


In [4]:
print("\n📁 Upload a CSV file to load it:")

uploaded = files.upload()

for filename in uploaded.keys():
    csv_df = pd.read_csv(StringIO(uploaded[filename].decode('utf-8')))
    print(f"✅ {filename} uploaded successfully. Showing first 5 rows:")
    display(csv_df.head())


📁 Upload a CSV file to load it:


In [5]:
!pip install pandas




In [6]:
import pandas as pd

url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
print(df.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


In [7]:
print(df.info())
print(df.describe())

# Check for missing values
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
None
       PassengerId    Survived      Pclass         Age       SibSp  \
count   891.000000  891.000000  891.000000  714.000000  891.000000   
mean    446.000000    0.383838    2.308642   29.699118    0.523008   
std     257.353842    0.48659

In [8]:

# Drop columns with too many missing values or not useful
df = df.drop(columns=['Cabin'])

# Fill missing 'Age' values with the median
df['Age'].fillna(df['Age'].median(), inplace=True)

# Fill missing 'Embarked' values with the mode
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)


In [9]:
df = df.drop_duplicates()



# 4.3: Convert Categorical Variables to Numeric (optional)
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

In [10]:

# Step 5: Verify the Clean Data
print(df.isnull().sum())
print(df.head())
print(df.info())

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked_Q     0
Embarked_S     0
dtype: int64
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name  Sex   Age  SibSp  Parch  \
0                            Braund, Mr. Owen Harris    0  22.0      1      0   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...    1  38.0      1      0   
2                             Heikkinen, Miss. Laina    1  26.0      0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)    1  35.0      1      0   
4                           Allen, Mr. William Henry    0  35.0      0      0   

             Ticket     Fare  Embarked_Q  Embarked_S  
0         A/5 21171   7.

In [11]:
df.to_csv("titanic_cleaned.csv", index=False)
