In [1]:
# NUMPY + PANDAS — THE FOUNDATION OF DATA SCIENCE

In [2]:
# NumPy (Numerical Python)
# Why Use NumPy?
 # Faster than Python lists
 # Useful for mathematical operations
 # Basis for all ML/AI libraries (e.g., TensorFlow, PyTorch)



In [3]:
# Import and Array Creation

import numpy as np

# Create 1D array
a = np.array([1, 2, 3, 4])
print(a)


# Create 2D array
b = np.array([[1, 2], [3, 4]])
print(b)



[1 2 3 4]
[[1 2]
 [3 4]]


In [4]:
# Array Operations

# Element-wise operations
x = np.array([10, 20, 30])
y = np.array([1, 2, 3])

print(x + y)
print(x * y)
print(x / y)


[11 22 33]
[10 40 90]
[10. 10. 10.]


In [5]:
# NumPy Functions

print("Mean of Array x is: ", np.mean(x))        # Mean
print("Max element of Array x is: ",np.max(x))         # Max
print("Min element of Array x is: ",np.min(x))         # Min
print("Standard deviation of Array x is: ",np.std(x))         # Standard Deviation


Mean of Array x is:  20.0
Max element of Array x is:  30
Min element of Array x is:  10
Standard deviation of Array x is:  8.16496580927726


In [6]:
# Array Indexing and Slicing

arr = np.array([10, 20, 30, 40, 50])
print(arr[1:4])   # Output: [20 30 40]
print(arr[0:len(arr)]) 

[20 30 40]
[10 20 30 40 50]


In [7]:
# Pandas (Data Analysis Library)
# Why Use Pandas?
 # Easier to work with tables of data (like Excel/CSV)
 # Lets you load, filter, sort, group, analyze easily



In [8]:
# Import and Load Data

import pandas as pd

url = "https://raw.githubusercontent.com/TrishankSaharia/python-data-science-basics/main/datasets/student_data.csv"
df = pd.read_csv(url) # Load CSV file
print(df.head()) # View top rows


      Name  Age  Gender  Marks       City
0    Alice   22  Female     85      Delhi
1      Bob   25    Male     78     Mumbai
2  Charlie   24    Male     92  Bangalore
3    Diana   23  Female     88    Chennai
4    Ethan   26    Male     74    Kolkata


In [9]:
# Create DataFrame Manually

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['Delhi', 'Mumbai', 'Bangalore']
}

df = pd.DataFrame(data)
print(df)


      Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [10]:
# DataFrame Operations

print(df['Age'],"\n")                  # Access column
print("Mean age is",df['Age'].mean(),"\n")          # Mean of Age column
print("Details of students with age greater than 28 \n",df[df['Age'] > 28],"\n")        # Filter rows
print("Dataframe sorted by age is: \n",df.sort_values('Age'))     # Sort by Age


0    25
1    30
2    35
Name: Age, dtype: int64 

Mean age is 30.0 

Details of students with age greater than 28 
       Name  Age       City
1      Bob   30     Mumbai
2  Charlie   35  Bangalore 

Dataframe sorted by age is: 
       Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [11]:
df.to_csv("output.csv", index=False)


In [12]:
# Task 1: Create a NumPy array with 10 elements

import numpy as np
x=np.array([10,20,30,40,50,60,70,80,90,100])
print(x)


[ 10  20  30  40  50  60  70  80  90 100]


In [13]:
# Task 2: Find mean, median, max, min, and standard deviation

print("Mean of x is: ",np.mean(x))
print("Median of x is: ",np.median(x))
print("Max in x is: ",np.max(x))
print("Min in x is: ",np.min(x))
print("Standard deviation of x is: ",np.std(x))

Mean of x is:  55.0
Median of x is:  55.0
Max in x is:  100
Min in x is:  10
Standard deviation of x is:  28.722813232690143


In [14]:
# Task 3: Element-wise addition and multiplication with another array

y=np.array([100,90,80,70,60,50,40,30,20,10])
print("Array x is: ",x,"\nArray y is: ",y,"\n")
print("Element wise addition of arrays x and y is:\n",x+y,"\n")
print("Element wise multiplication of arrays x and y is:\n",x*y)

Array x is:  [ 10  20  30  40  50  60  70  80  90 100] 
Array y is:  [100  90  80  70  60  50  40  30  20  10] 

Element wise addition of arrays x and y is:
 [110 110 110 110 110 110 110 110 110 110] 

Element wise multiplication of arrays x and y is:
 [1000 1800 2400 2800 3000 3000 2800 2400 1800 1000]


In [16]:
# Task 4: Slice elements from index n to m

n=int(input("Enter lower index: "))
m=int(input("Enter upper index: "))
print(f"Sliced Array from array x from index {n} to index {m} is: ",x[n:m+1])


Enter lower index: 3
Enter upper index: 5
Sliced Array from array x from index 3 to index 5 is:  [40 50 60]


In [18]:
# Pandas Tasks (using my dataset)

import pandas as pd

url = "https://raw.githubusercontent.com/TrishankSaharia/python-data-science-basics/main/datasets/student_data.csv"
df = pd.read_csv(url) # Load CSV file
print(df.head(),"\n") # View top rows

# Task 4: Showing first n rows and last m rows from my dataset

n=int(input("Enter the number of rows from the top: "))
m=int(input("Enter the number of rows from the bottom: "))

print(f"\nFirst {n} rows in my dataset are: \n\n",df.head(n))
print(f"\nLast {m} rows in my dataset are: \n\n",df.tail(m))




      Name  Age  Gender  Marks       City
0    Alice   22  Female     85      Delhi
1      Bob   25    Male     78     Mumbai
2  Charlie   24    Male     92  Bangalore
3    Diana   23  Female     88    Chennai
4    Ethan   26    Male     74    Kolkata 

Enter the number of rows from the top: 3
Enter the number of rows from the bottom: 2

First 3 rows in my dataset are: 

       Name  Age  Gender  Marks       City
0    Alice   22  Female     85      Delhi
1      Bob   25    Male     78     Mumbai
2  Charlie   24    Male     92  Bangalore

Last 2 rows in my dataset are: 

     Name  Age  Gender  Marks     City
3  Diana   23  Female     88  Chennai
4  Ethan   26    Male     74  Kolkata


In [21]:
# Task 6: Filter rows where Marks > m

m=int(input("Enter marks above which you want the dataset entries: "))
filtered_df = df[df['Marks']>m]
print("\n",filtered_df)


Enter marks above which you want the dataset entries: 87

       Name  Age  Gender  Marks       City
2  Charlie   24    Male     92  Bangalore
3    Diana   23  Female     88    Chennai


In [23]:
# Task 7: Sort by Age and Sort by marks

sorted_df1 = df.sort_values(by='Age')
print("Data sorted by Age",sorted_df1)

sorted_df2 = df.sort_values(by='Marks')
print("\nData sorted by Age",sorted_df2)


Data sorted by Age       Name  Age  Gender  Marks       City
0    Alice   22  Female     85      Delhi
3    Diana   23  Female     88    Chennai
2  Charlie   24    Male     92  Bangalore
1      Bob   25    Male     78     Mumbai
4    Ethan   26    Male     74    Kolkata

Data sorted by Age       Name  Age  Gender  Marks       City
4    Ethan   26    Male     74    Kolkata
1      Bob   25    Male     78     Mumbai
0    Alice   22  Female     85      Delhi
3    Diana   23  Female     88    Chennai
2  Charlie   24    Male     92  Bangalore


In [24]:
# Task 8: Add a new column “Grade” based on Marks

def get_grade(marks):
# This creates a Python function named get_grade that takes in a single number (marks).
    if marks >= 90:
        return 'A+'
    elif marks >= 80:
        return 'A'
    elif marks >= 70:
        return 'B'
    else:
        return 'C'

# Apply This Function to Every Row in the 'Marks' Column

df['Grade'] = df['Marks'].apply(get_grade)


# df['Marks'] extracts the Marks column as a Series
# .apply(get_grade) applies your custom get_grade function to each mark
# The result is a new column Grade added to the dataframe

df

Unnamed: 0,Name,Age,Gender,Marks,City,Grade
0,Alice,22,Female,85,Delhi,A
1,Bob,25,Male,78,Mumbai,B
2,Charlie,24,Male,92,Bangalore,A+
3,Diana,23,Female,88,Chennai,A
4,Ethan,26,Male,74,Kolkata,B


In [25]:
# Task 9: Export the updated dataframe to a new CSV

df.to_csv("final_processed_student_data.csv", index=False)
print("Saved to final_processed_student_data.csv")

Saved to final_processed_student_data.csv


In [28]:
# Task 10: Showing column names and data types

print(df.columns)
print(df.dtypes)

Index(['Name', 'Age', 'Gender', 'Marks', 'City', 'Grade'], dtype='object')
Name      object
Age        int64
Gender    object
Marks      int64
City      object
Grade     object
dtype: object
