In [None]:
# 1 Demonstrate three different methods for creating identical 2D arrays in NumPy. Provide the code for each method and the final output after each method 

import numpy as np

# Method 1: np.array with a list of lists
a1 = np.array([[1, 2], [3, 4]])
print(a1)

# Method 2: np.ones/multiply & reshape
a2 = np.ones((2, 2)) * np.array([1, 2, 3, 4]).reshape(2,2)
print(a2)

# Method 3: np.full or np.zeros, then assign
a3 = np.zeros((2, 2), dtype=int)
a3[0] = [1, 2]
a3[1] = [3, 4]
print(a3)


In [None]:
# 2Using the Numpy function, generate an array of 100 evenly spaced numbers between 1 and 10 and Reshape that 1D array into a 2D array 
arr = np.linspace(1, 10, 100)
arr2d = arr.reshape(20, 5)  # shape must multiply to 100, e.g., (10,10), (20,5), etc.
print(arr2d)


In [None]:
# 3Explain the following terms 
# The difference in np.array, np.asarray and np.asanyarray 
# The difference between Deep copy and shallow copy 

# np.array: Always makes a copy (unless given another np.array).
# np.asarray: Converts input to array; doesn’t copy if input is already np.array.
# np.asanyarray: Like asarray, but passes through subclasses.

# Deep copy: New array, changes do NOT affect original.
# Shallow copy: Only reference is copied, changes in one reflect in the other (if the copy is a view, e.g., arr[::2]).

In [None]:
# 4 Generate a 3x3 array with random floating-point numbers between 5 and 20. Then, round each number in the array to 2 decimal places. 
arr = np.random.uniform(5, 20, (3, 3))
rounded = np.round(arr, 2)
print(rounded)


In [None]:
# 5Create a NumPy array with random integers between 1 and 10 of shape (5, 6). After creating the array perform the following operations: 
# a)Extract all even integers from array. 
# b)Extract all odd integers from array 

arr = np.random.randint(1, 11, (5, 6))
evens = arr[arr % 2 == 0]
odds = arr[arr % 2 == 1]
print("Evens:", evens)
print("Odds:", odds)


In [None]:
# 6 Create a 3D NumPy array of shape (3, 3, 3) containing random integers between 1 and 10. Perform the following operations: 
# a) Find the indices of the maximum values along each depth level (third axis). 
# b) Perform element-wise multiplication of between both array 

arr = np.random.randint(1, 11, (3, 3, 3))
max_indices = np.argmax(arr, axis=2)
elementwise_multiply = arr * arr  # is just arr squared
print("Indices of max:", max_indices)
print("Squared array:", elementwise_multiply)


In [None]:
# 7 Clean and transform the 'Phone' column in the sample dataset to remove non-numeric characters and convert it to a numeric data type. Also display the table attributes and data types of each column 

import pandas as pd
import re

df = pd.read_csv('People-Data.csv')
df['Phone_clean'] = df['Phone'].astype(str).apply(lambda x: re.sub(r'\D', '', x) or pd.NA).astype('Int64')
print(df.dtypes)


In [None]:
# 8 Perform the following tasks using people dataset: 
# a) Read the 'data.csv' file using pandas, skipping the first 50 rows. 
# b) Only read the columns: 'Last Name', ‘Gender’,’Email’,‘Phone’ and ‘Salary’ from the file. 
# c) Display the first 10 rows of the filtered dataset.
# d) Extract the ‘Salary’' column as a Series and display its last 5 values 

df = pd.read_csv('People-Data.csv', skiprows=50)
cols = ['Last Name', 'Gender', 'Email', 'Phone', 'Salary']
df = df[cols]
print(df.head(10))
print(df['Salary'].tail())


In [None]:
# 9 Filter and select rows from the People_Dataset, where the “Last Name' column contains the name 'Duke',  'Gender' column contains the word Female and ‘Salary’ should be less than 85000

mask = df['Last Name'].str.contains('Duke', na=False) & \
       (df['Gender'] == 'Female') & \
       (df['Salary'] < 85000)
filtered = df[mask]
print(filtered)


In [None]:
# 10 Create a 7*5 Dataframe in Pandas using a series generated from 35 random integers between 1 to 6?

randints = np.random.randint(1, 7, 35)
df = pd.DataFrame(randints.reshape(7, 5))
print(df)


In [None]:
# 11 Create two different Series, each of length 50, with the following criteria: 
# a) The first Series should contain random numbers ranging from 10 to 50. 
# b) The second Series should contain random numbers ranging from 100 to 1000. 
# c) Create a DataFrame by joining these Series by column, and, change the names of the columns to 'col1', 'col2', etc 

s1 = pd.Series(np.random.randint(10, 51, 50))
s2 = pd.Series(np.random.randint(100, 1001, 50))
df = pd.concat([s1, s2], axis=1)
df.columns = ['col1', 'col2']
print(df)


In [None]:
# 12Perform the following operations using people data set: 
# a) Delete the 'Email', 'Phone', and 'Date of birth' columns from the dataset. 
# b) Delete the rows containing any missing values.
# d) Print the final output also 

df = pd.read_csv('People-Data.csv')
df = df.drop(['Email', 'Phone', 'Date of birth'], axis=1)
df = df.dropna()
print(df)


In [None]:
# 13 Create two NumPy arrays, x and y, each containing 100 random float values between 0 and 1. Perform the following tasks using Matplotlib and NumPy: 
# a) Create a scatter plot using x and y, setting the color of the points to red and the marker style to 'o'. 
# b) Add a horizontal line at y = 0.5 using a dashed line style and label it as 'y = 0.5'. 
# c) Add a vertical line at x = 0.5 using a dotted line style and label it as 'x = 0.5'. 
# d) Label the x-axis as 'X-axis' and the y-axis as 'Y-axis'. 
# e) Set the title of the plot as 'Advanced Scatter Plot of Random Values'. 
# f) Display a legend for the scatter plot, the horizontal line, and the vertical line.

import matplotlib.pyplot as plt

x = np.random.rand(100)
y = np.random.rand(100)
plt.scatter(x, y, c='red', marker='o', label='Points')
plt.axhline(0.5, color='black', linestyle='--', label='y = 0.5')
plt.axvline(0.5, color='blue', linestyle=':', label='x = 0.5')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Advanced Scatter Plot of Random Values')
plt.legend()
plt.show()


In [None]:
# 14 Create a time-series dataset in a Pandas DataFrame with columns: 'Date', 'Temperature', 'Humidity' and Perform the following tasks using Matplotlib: 
# a) Plot the 'Temperature' and 'Humidity' on the same plot with different y-axes (left y-axis for 'Temperature' and right y-axis for 'Humidity'). 
# b) Label the x-axis as 'Date'. 
# c) Set the title of the plot as 'Temperature and Humidity Over Time'. 

df = pd.DataFrame({
    'Date': pd.date_range('2025-01-01', periods=30),
    'Temperature': np.random.randint(20, 40, 30),
    'Humidity': np.random.randint(40, 80, 30)
})
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df['Date'], df['Temperature'], 'r-', label='Temperature')
ax2.plot(df['Date'], df['Humidity'], 'b-', label='Humidity')
ax1.set_xlabel('Date')
ax1.set_ylabel('Temperature', color='r')
ax2.set_ylabel('Humidity', color='b')
plt.title('Temperature and Humidity Over Time')
plt.show()


In [None]:
# 15 Create a NumPy array data containing 1000 samples from a normal distribution. Perform the following tasks using Matplotlib: 
# a) Plot a histogram of the data with 30 bins. 
# b) Overlay a line plot representing the normal distribution's probability density function (PDF). 
# c) Label the x-axis as 'Value' and the y-axis as 'Frequency/Probability'. 
# d) Set the title of the plot as 'Histogram with PDF Overlay'.

from scipy.stats import norm

data = np.random.normal(0, 1, 1000)
plt.hist(data, bins=30, density=True, alpha=0.6, label='Histogram')
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
pdf = norm.pdf(x, 0, 1)
plt.plot(x, pdf, 'k-', label='PDF')
plt.xlabel('Value')
plt.ylabel('Frequency/Probability')
plt.title('Histogram with PDF Overlay')
plt.legend()
plt.show()


In [None]:
# 16 Set the title of the plot as 'Histogram with PDF Overlay'. 

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

# Read the data from the CSV file
df = pd.read_csv('People-Data.csv')

# Extract and clean the Salary column
salaries = pd.to_numeric(df['Salary'], errors='coerce').dropna()

# Compute mean and std for the salary data
mean = salaries.mean()
std = salaries.std()

# Plot the histogram
plt.figure(figsize=(10, 6))
count, bins, ignored = plt.hist(salaries, bins=20, density=True, alpha=0.6, color='g', edgecolor='black')

# Overlay the PDF of a normal distribution
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mean, std)
plt.plot(x, p, 'k', linewidth=2)

# Set the title and labels
plt.title('Histogram with PDF Overlay')
plt.xlabel('Salary')
plt.ylabel('Density')

plt.tight_layout()
plt.show()


In [None]:
# 17 Create a Seaborn scatter plot of two random arrays, color points based on their position relative to the origin (quadrants), add a legend, label the axes, and set the title as 'Quadrant-wise Scatter Plot'.
import seaborn as sns

x = np.random.randn(100)
y = np.random.randn(100)
# Determine quadrant
labels = []
for xi, yi in zip(x, y):
    if xi>0 and yi>0: labels.append('I')
    elif xi<0 and yi>0: labels.append('II')
    elif xi<0 and yi<0: labels.append('III')
    else: labels.append('IV')
sns.scatterplot(x=x, y=y, hue=labels, palette='bright')
plt.axhline(0, color='k', linestyle='--')
plt.axvline(0, color='k', linestyle='--')
plt.legend(title='Quadrant')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Quadrant-wise Scatter Plot')
plt.show()


In [None]:
# 18 With Bokeh, plot a line chart of a sine wave function, add grid lines, label the axes, and set the title as 'Sine Wave Function' 
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np

output_notebook()
x = np.linspace(0, 4*np.pi, 100)
y = np.sin(x)
p = figure(title='Sine Wave Function')
p.line(x, y, line_width=2)
p.xaxis.axis_label = 'x'
p.yaxis.axis_label = 'sin(x)'
p.grid.visible = True
show(p)





In [None]:
# 19 Using Bokeh, generate a bar chart of randomly generated categorical data, color bars based on their values, add hover tooltips to display exact values, label the axes, and set the title as 'Random Categorical Bar Chart' 
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool
import numpy as np

output_notebook()
cats = ['A','B','C','D','E']
values = np.random.randint(1, 20, 5)
colors = ['#%02x%02x%02x' % (r, 50, 120) for r in values*10]
source = ColumnDataSource(data={'cats':cats, 'vals':values, 'color':colors})
p = figure(x_range=cats, title='Random Categorical Bar Chart')
p.vbar(x='cats', top='vals', width=0.9, source=source,
       color='color', legend_field='cats')
p.add_tools(HoverTool(tooltips=[('Value', '@vals')]))
p.xaxis.axis_label = 'Category'
p.yaxis.axis_label = 'Value'
p.grid.visible = True
show(p)



In [None]:
# 20Using Plotly, create a basic line plot of a randomly generated dataset, label the axes, and set the title as 'Simple Line Plot' 
import plotly.graph_objs as go
import numpy as np

y = np.random.randn(100).cumsum()
fig = go.Figure()
fig.add_trace(go.Scatter(y=y, mode='lines'))
fig.update_layout(title='Simple Line Plot', xaxis_title='Index', yaxis_title='Value')
fig.show()


In [None]:

# 21Using Plotly, create an interactive pie chart of randomly generated data, add labels and percentages, set the title as 'Interactive Pie Chart'.

import plotly.express as px
import numpy as np

data = np.random.randint(1, 20, 5)
labels = ['A','B','C','D','E']
fig = px.pie(values=data, names=labels, title='Interactive Pie Chart', hole=0.4)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()