In [1]:
import pandas as pd

### Q1. Create a Pandas Series that contains the following data: 4, 8, 15, 16, 23, and 42. Then, print the series.

In [3]:
data = [4, 8, 15, 16, 23, 42]
s = pd.Series(data)
s

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

### Q2. Create a variable of list type containing 10 elements in it, and apply pandas.Series function on the variable print it.

In [4]:
data = [i for i in range(10)]
s2 = pd.Series(data)
s2

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

### Q3. Create a Pandas DataFrame that contains the following data:
### Then, print the DataFrame.

In [5]:
data = {
    'Name': ['Alice', 'Bob', 'Claire'],
    'Age': [25, 30, 27],
    'Gender': ['Female', 'Male', 'Female']
}
df1 = pd.DataFrame(data)
df1

Unnamed: 0,Name,Age,Gender
0,Alice,25,Female
1,Bob,30,Male
2,Claire,27,Female


### Q4. What is ‘DataFrame’ in pandas and how is it different from pandas.series? Explain with an example.

In [6]:
# Create example Series
series_data = [1, 2, 3, 4, 5]
series_example = pd.Series(series_data)
print("Pandas Series Example:")
print(series_example)
print("\nType:", type(series_example))
print("Shape:", series_example.shape)
print("Dimensions:", series_example.ndim)

print("\n" + "="*50 + "\n")

# Create example DataFrame
df_data = {
    'Column1': [1, 2, 3, 4, 5],
    'Column2': ['A', 'B', 'C', 'D', 'E'],
    'Column3': [True, False, True, True, False]
}
df_example = pd.DataFrame(df_data)
print("Pandas DataFrame Example:")
print(df_example)
print("\nType:", type(df_example))
print("Shape:", df_example.shape)
print("Dimensions:", df_example.ndim)

# Key differences explanation
print("\nKey Differences between Series and DataFrame:")
print("1. Series is 1-dimensional while DataFrame is 2-dimensional")
print("2. Series represents a single column while DataFrame can have multiple columns")
print("3. DataFrame can store different data types per column, Series typically contains same type")
print("4. DataFrame has labeled columns and rows, Series has only index labels")


Pandas Series Example:
0    1
1    2
2    3
3    4
4    5
dtype: int64

Type: <class 'pandas.core.series.Series'>
Shape: (5,)
Dimensions: 1


Pandas DataFrame Example:
   Column1 Column2  Column3
0        1       A     True
1        2       B    False
2        3       C     True
3        4       D     True
4        5       E    False

Type: <class 'pandas.core.frame.DataFrame'>
Shape: (5, 3)
Dimensions: 2

Key Differences between Series and DataFrame:
1. Series is 1-dimensional while DataFrame is 2-dimensional
2. Series represents a single column while DataFrame can have multiple columns
3. DataFrame can store different data types per column, Series typically contains same type
4. DataFrame has labeled columns and rows, Series has only index labels


### Q5. What are some common functions you can use to manipulate data in a Pandas DataFrame? Can you give an example of when you might use one of these functions?

In [7]:
# Create example DataFrame with missing values
df = pd.DataFrame({
    'Name': ['John', 'Emma', 'Mike', None, 'Sarah'],
    'Age': [25, 30, None, 35, 28],
    'Salary': [50000, 60000, 45000, 70000, None]
})

print("Original DataFrame:")
print(df)
print("\n")

# 1. fillna() - Fill missing values
df_filled = df.fillna("Missing")
print("After fillna():")
print(df_filled)
print("\n")

# 2. drop() - Remove rows with missing values
df_dropped = df.dropna()
print("After dropna():")
print(df_dropped)
print("\n")

# 3. sort_values() - Sort by a column
df_sorted = df.sort_values('Age', ascending=False)
print("After sort_values():")
print(df_sorted)
print("\n")

# 4. groupby() - Group and aggregate data
df_grouped = df.groupby('Age')['Salary'].mean()
print("After groupby() and mean():")
print(df_grouped)
print("\n")

# 5. apply() - Apply custom function to data
df['Salary_K'] = df['Salary'].apply(lambda x: x/1000 if pd.notnull(x) else None)
print("After apply():")
print(df)

Original DataFrame:
    Name   Age   Salary
0   John  25.0  50000.0
1   Emma  30.0  60000.0
2   Mike   NaN  45000.0
3   None  35.0  70000.0
4  Sarah  28.0      NaN


After fillna():
      Name      Age   Salary
0     John     25.0  50000.0
1     Emma     30.0  60000.0
2     Mike  Missing  45000.0
3  Missing     35.0  70000.0
4    Sarah     28.0  Missing


After dropna():
   Name   Age   Salary
0  John  25.0  50000.0
1  Emma  30.0  60000.0


After sort_values():
    Name   Age   Salary
3   None  35.0  70000.0
1   Emma  30.0  60000.0
4  Sarah  28.0      NaN
0   John  25.0  50000.0
2   Mike   NaN  45000.0


After groupby() and mean():
Age
25.0    50000.0
28.0        NaN
30.0    60000.0
35.0    70000.0
Name: Salary, dtype: float64


After apply():
    Name   Age   Salary  Salary_K
0   John  25.0  50000.0      50.0
1   Emma  30.0  60000.0      60.0
2   Mike   NaN  45000.0      45.0
3   None  35.0  70000.0      70.0
4  Sarah  28.0      NaN       NaN


### Q6. Which of the following is mutable in nature Series, DataFrame, Panel?

In [8]:
# Create a Series
s = pd.Series([1, 2, 3, 4, 5])
print("Original Series:")
print(s)

# Demonstrate mutability by modifying values
s[0] = 10
print("\nAfter modifying Series:")
print(s)

# Create a DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
})
print("\nOriginal DataFrame:")
print(df)

# Demonstrate mutability by modifying values
df.loc[0, 'A'] = 100
print("\nAfter modifying DataFrame:")
print(df)

# Note: Panel is deprecated in newer versions of pandas
print("\nNote: Panel has been deprecated in pandas since version 0.25.0")
print("Both Series and DataFrame are mutable in nature.")


Original Series:
0    1
1    2
2    3
3    4
4    5
dtype: int64

After modifying Series:
0    10
1     2
2     3
3     4
4     5
dtype: int64

Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6

After modifying DataFrame:
     A  B
0  100  4
1    2  5
2    3  6

Note: Panel has been deprecated in pandas since version 0.25.0
Both Series and DataFrame are mutable in nature.


### Q7. Create a DataFrame using multiple Series. Explain with an example.

In [17]:
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series(['Anuj', 'Ashish', 'Adarsh', 'Alok'])
pd.DataFrame({
    'Id': s1,
    'Name': s2
})

Unnamed: 0,Id,Name
0,1,Anuj
1,2,Ashish
2,3,Adarsh
3,4,Alok
4,5,


In [13]:
# Create individual Series
age = pd.Series([25, 30, 35], name='Age')
name = pd.Series(['John', 'Alice', 'Bob'], name='Name') 
city = pd.Series(['New York', 'London', 'Paris'], name='City')

# Create DataFrame from multiple Series
df = pd.DataFrame({
    'Name': name,
    'Age': age, 
    'City': city
})

print("DataFrame created from multiple Series:")
print(df)

# Explanation:
print("\nExplanation:")
print("1. We created 3 separate Series with names: age, name and city")
print("2. Each Series has the same length but different data types")
print("3. We combined them into a DataFrame using pd.DataFrame()")
print("4. The Series names became column names in the DataFrame")


DataFrame created from multiple Series:
    Name  Age      City
0   John   25  New York
1  Alice   30    London
2    Bob   35     Paris

Explanation:
1. We created 3 separate Series with names: age, name and city
2. Each Series has the same length but different data types
3. We combined them into a DataFrame using pd.DataFrame()
4. The Series names became column names in the DataFrame
