# Steps for starting

1- Download environment from OneDrive

2- Open Anaconda prompt and Anaconda Navigator

3- copy and paste the environment to Downloads


4- In Anaconda prompt write these commands:

 cd "C:\Users\3120889D"

 conda env create -f "C:\Users\3120889D\Downloads\environment.yml"

 conda activate condavenv


5- Meanwhile open vs code and change the version of Jupyter to 5 months ago

6- In Anaconda Navigator, change the environment to condavenv and launch VS Code

7- Create a new folder on Downloads called exam

8- Open the folder from VS Code

9- Create a python script exam.py and make sure it is connected with the interpreter

10- Create a Jupyter Notebook and run a cell

# General

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import torch
import torch.nn as nn
import torch.optim as optim

df = pd.read_csv('sales_data.csv')

In [None]:
df.fillna({ # quick cleaning
    'Product': 'Unknown',
    'Quantity': 0,
    'Price': 0.0,
    'Total': 0.0
}, inplace=True)

monthly_sales = df.groupby('Month')['Total'].sum() # group by example

# Matplotlib

In [None]:
# line plot
plt.plot(x, y)
plt.title("y = 2x")
plt.xlabel("x axis")
plt.ylabel("y axis")
plt.show()

In [None]:
# plot from a dataframe
df.plot(kind='bar') # bar plot
df.plot(kind='line', title='Total Sales Over Time', ylabel='Total Sales', xlabel='Month') # line plot

# or
plt.plot(df.index, df.values) # note that df is a Series here

In [None]:
# subplots
x1 = [20, 30, 50, 60, 80]
y1 = [10, 50, 100, 180, 200]
x2 = [30, 40, 60, 70, 90]
y2 = [20, 60, 110, 200, 220]

plt.subplot(221)
plt.plot(x1, y1)
plt.title("x1 and y1")
plt.subplot(222)
plt.plot(x2, y2)
plt.title("x2 and y2")
plt.subplot(223)
plt.plot(x1, y1)
plt.plot(x2, y2)
plt.title("All together")
plt.show()

In [None]:
# bar plot
proglang = ["Python", "Java", "C", "C++", "R", "JavaScript", "C#"]
popularity = [100, 96.3, 94.4, 87.5, 81.5, 79.4, 74.5]

plt.bar(proglang, popularity) # or plt.barh(proglang, popularity) for a horizental bar 
plt.title("Popularity of programming languages")
plt.show()

In [None]:
# pie chart
proglang = ["Python", "Java", "C", "C++", "R", "JavaScript", "C#"]
popularity = [100, 96.3, 94.4, 87.5, 81.5, 79.4, 74.5]
explode = [.6, 0, 0, 0, 0.4, 0.2, 0]
colours = []
for i in range(len(proglang)):
    colours.append(np.random.random(3))
plt.pie(popularity, labels=proglang, colors=colours, explode=explode, autopct="%1.1f%%")
plt.title("Popularity of programming languages")
plt.show()

In [None]:
# error bar chart
mean = [0.2474, 0.1235, 0.1737, 0.1824]
stdev = [0.3314, 0.2278, 0.2836, 0.2645]

plt.bar(["Obs1", "Obs2", "Obs3", "Obs4"], mean, yerr=stdev, error_kw={"ecolor": "0.1", "capsize": 4}, alpha=0.7)
plt.show()

In [None]:
# stacked bar plot
means_m = np.array([22, 30, 35, 35, 26])
means_f = np.array([25, 32, 30, 35, 29])
stdev_m = np.array([4, 3, 4, 1, 5])
stdev_f = np.array([3, 5, 2, 3, 3])
columns = ("Obs1", "Obs2", "Obs3", "Obs4", "Obs5")

plt.bar(columns, means_m, facecolor="blue", yerr=stdev_m, error_kw={"capsize": 4}, alpha=0.7)
plt.bar(columns, means_f, facecolor="red", yerr=stdev_f, error_kw={"capsize": 4}, alpha=0.7, bottom=means_m)

In [None]:
# scatter plot
x = np.random.random(100)
y = np.random.random(100)
sizes = np.random.randint(50,500,100)
colors = np.random.random(100)

plt.scatter(x, y, sizes, c=colors, alpha=0.5)
plt.show()


# another scatter
java_marks = [88, 92, 80, 89, 100, 80, 60, 100, 80, 34]
python_marks = [35, 79, 79, 48, 100, 88, 32, 45, 20, 30]
marks_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

plt.scatter(java_marks, python_marks)
plt.xlim(marks_range[0], marks_range[-1]+10)
plt.ylim(marks_range[0], marks_range[-1]+10)
plt.xlabel("Java Marks")
plt.ylabel("Python Marks")
plt.grid()
plt.show()

# SQLite

In [None]:
import sqlite3
with sqlite3.connect('SalesDB') as db:
    cursor = db.cursor()

cursor.execute("""
    CREATE TABLE IF NOT EXISTS SALES (
    Date DATETIME PRIMARY KEY, 
    Product TEXT, 
    Quantity INTEGER, 
    Price REAL, 
    Total REAL
    );""")

for i, row in df.iterrows():
    try:
        cursor.execute("""INSERT INTO Sales (
            Date, Product, Quantity, Price, Total) 
            VALUES(?,?,?,?,?)""",
            (row['Date'].strftime('%Y-%m-%d %H:%M:%S'), row['Product'], row['Quantity'], row['Price'], row['Total'])
        )
        db.commit()
    except:
        continue

In [None]:
# cursor.execute("SELECT * FROM SALES")
# for x in cursor.fetchall():
#     print(x)

#OR

query = "SELECT * FROM SALES"
df = pd.read_sql_query(query, db)
print("Data Loaded and Cleaned:")
print(df)

In [None]:
cursor.execute('''
    SELECT SUM(Total) FROM Sales WHERE strftime('%Y', Date) = '2023'
''')
total_sales = cursor.fetchone()[0]
print(f"Total Sales in 2023: {total_sales}")

In [None]:
cursor.execute("SELECT product, sum(quantity) FROM SALES WHERE strftime('%Y', Date) = '2023' group by product order by sum(quantity) desc""")
for x in cursor.fetchall():
    print(x)

db.close()

# PyTorch

In [None]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)  # Input layer: 10 features -> 5 neurons
        self.fc2 = nn.Linear(5, 3)   # Hidden layer: 5 neurons -> 3 neurons
        self.fc3 = nn.Linear(3, 1)   # Output layer: 3 neurons -> 1 neuron
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

# Instantiate the model
model = SimpleNN()
criterion = nn.MSELoss()  # Mean Squared Error loss function
optimizer = optim.SGD(model.parameters(), lr=0.01)  # OR x = torch.softmax(self.fc3(x), dim=1)

# Generate synthetic data
data = torch.randn(100, 10)  # 100 samples, 10 features each
target = torch.randn(100, 1)  # 100 target values

# Training the network
losses = []
for epoch in range(20):  # Train for 20 epochs
    optimizer.zero_grad()  # Zero the gradients
    output = model(data)  # Forward pass
    loss = criterion(output, target)  # Compute the loss
    loss.backward()  # Backpropagation
    optimizer.step()  # Update weights
    losses.append(loss.item())  # Record the loss

# Plotting the training loss over time
plt.plot(losses)
plt.title('Training Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Past Exam Official Solution

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 26 16:04:27 2024

@author: mireilla
"""
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import torch
import torch.nn as nn
import torch.optim as optim

# --- Part 1: Python Basics and Data Manipulation ---

# Task 1a: Data Loading and Preprocessing
# Load the data and parse dates
df = pd.read_csv('sales_data.csv', parse_dates=['Date'], dayfirst=True)

# Data Cleaning
# Fill missing values
df.fillna({
    'Product': 'Unknown',
    'Quantity': 0,
    'Price': 0.0,
    'Total': 0.0
}, inplace=True)

# Keep the original datetime column for operations
df['Date_original'] = df['Date']

# Convert Date to Period (Month) using the original datetime column
df['Month'] = df['Date_original'].dt.to_period('M')

# Ensure Total column is correct
df['Total'] = df['Quantity'] * df['Price']

print("Data Loaded and Cleaned:")
print(df.head())

# Task 1b: Data Visualization
# Product Sales Distribution
product_sales = df.groupby('Product')['Quantity'].sum()
product_sales.plot(kind='bar', title='Product Sales Distribution', ylabel='Total Quantity Sold', xlabel='Product')
plt.show()

# Sales Over Time (2023)
# Group by Month and calculate total sales per month
monthly_sales = df.groupby('Month')['Total'].sum()

# Plot the results
monthly_sales.plot(kind='line', title='Total Sales Over Time', ylabel='Total Sales', xlabel='Month')
plt.show()

# Convert Date to string for database insertion
df['Date'] = df['Date_original'].dt.strftime('%Y-%m-%d')

# --- Part 2: Python Database Management ---

# Task 2a: Database Creation and Data Insertion
# Create the database
conn = sqlite3.connect('SalesDB.db')
cursor = conn.cursor()

# Create the Sales table
cursor.execute('''
    CREATE TABLE IF NOT EXISTS Sales (
        Date TEXT,
        Product TEXT,
        Quantity INTEGER,
        Price REAL,
        Total REAL
    )
''')

# Insert data into the Sales table
for _, row in df.iterrows():
    cursor.execute('''
        INSERT OR IGNORE INTO Sales (Date, Product, Quantity, Price, Total)
        VALUES (?, ?, ?, ?, ?)
    ''', (row['Date'], row['Product'], row['Quantity'], row['Price'], row['Total']))

conn.commit()
print("Data inserted into SalesDB.")

# Task 2b: Querying the Database
# Total Sales Calculation
cursor.execute('''
    SELECT SUM(Total) FROM Sales WHERE strftime('%Y', Date) = '2023'
''')
total_sales = cursor.fetchone()[0]
print(f"Total Sales in 2023: {total_sales}")

# Product Sales Summary
cursor.execute('''
    SELECT Product, SUM(Quantity) FROM Sales 
    WHERE strftime('%Y', Date) = '2023' 
    GROUP BY Product 
    ORDER BY SUM(Quantity) DESC
''')
product_summary = cursor.fetchall()
print("Product Sales Summary in 2023:")
for product, total_quantity in product_summary:
    print(f"Product: {product}, Total Quantity Sold: {total_quantity}")

conn.close()

# --- Part 3: Basic Neural Network Implementation ---

# Task 3a: Neural Network Construction
# Define the neural network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)  # Input layer: 10 features -> 5 neurons
        self.fc2 = nn.Linear(5, 3)   # Hidden layer: 5 neurons -> 3 neurons
        self.fc3 = nn.Linear(3, 1)   # Output layer: 3 neurons -> 1 neuron
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

# Instantiate the model
model = SimpleNN()
criterion = nn.MSELoss()  # Mean Squared Error loss function
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Stochastic Gradient Descent optimizer

# Generate synthetic data
data = torch.randn(100, 10)  # 100 samples, 10 features each
target = torch.randn(100, 1)  # 100 target values

# Training the network
losses = []
for epoch in range(20):  # Train for 20 epochs
    optimizer.zero_grad()  # Zero the gradients
    output = model(data)  # Forward pass
    loss = criterion(output, target)  # Compute the loss
    loss.backward()  # Backpropagation
    optimizer.step()  # Update weights
    losses.append(loss.item())  # Record the loss

# Plotting the training loss over time
plt.plot(losses)
plt.title('Training Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Files and Random

In [None]:
print("(1) Create a new file")
print("(2) Display the file")
print("(3) Add a new item to the file")
selection = int(input("Make a selection 1, 2 or 3: "))
if selection == 1:
    subject = input("Enter a school subject: ")
    file = open("Subject.txt", "w")
    file.write(subject + "\n")
    file.close()
elif selection == 2:
    file = open("Subject.txt", "r")
    print(file.read())
    file.close()
elif selection == 3:
    file = open("Subject.txt", "a")
    subject = input("Enter a school subject: ")
    file.write(subject + "\n")
    file.close()
    file = open("Subject.txt", "r")
    print(file.read())
    file.close()
else:
    print("Invalid option")

In [None]:
# random
import random
print(random.randint(1,10))        # random integer between 1 and 10
print(random.random())             # random float 0.0–1.0
print(random.uniform(5,15))        # random float between 5 and 15
print(random.choice([1,2,3,4]))    # random element from list
nums = [1,2,3,4,5]
random.shuffle(nums)               # shuffle list in place
print(nums)
print(random.sample(range(100),5)) # pick 5 unique random numbers

# Numpy

In [None]:
# Create arrays + dtype
a=np.array([1,2,3])
b=np.array([(1,2,3),(4,5,6)],dtype=float)
c=np.array([[1,2],[3,4]],dtype=np.int32)

# Constructors
z=np.zeros((2,3))
o=np.ones((2,3))
eye=np.eye(3)
diag=np.diag([1,2,3])
r=np.arange(0,10,2)
l=np.linspace(0,1,5)
u=np.random.random((2,2))

In [None]:
# Arithmetic (elementwise)
x=np.array([1,2,3]);y=np.array([4,5,6])
s=x+y
p=x*y
q=x**2
m=x+10

# Matrix product
A=np.array([[0,1,2],[3,4,5]])
B=np.array([[1],[1],[1]])
M=A.dot(B)

In [None]:
# Vectorized funcs
y=np.sin(x)
rt=np.sqrt(x)
cond=(x>1)
all_ok=np.all(cond)
any_ok=np.any(cond)

In [None]:
# Transpose, trace, inverse
C=np.array([[1.,2.],[3.,4.]])
Ct=C.T
tr=np.trace(C)
inv=np.linalg.inv(C)

# Shape ops
d=np.arange(12)
e=d.reshape(3,4)
f=e.ravel()
e.shape=(4,3)  # in-place reshape

In [None]:
# Stack + split
g=np.array([[1,2],[3,4]])
h=np.array([[5,6],[7,8]])
vs=np.vstack((g,h))
hs=np.hstack((g,h))
cs=np.column_stack((g,h))
rs=np.row_stack((g,h))
hs1,hs2=np.hsplit(vs,2)
vs1,vs2=np.vsplit(vs,2)
sp=np.split(np.arange(10),[1,3,8])

In [None]:
# Save/Load text
X=np.arange(6).reshape(3,2)
np.savetxt("data.txt",X,fmt="%.3f",delimiter=",")
Y=np.loadtxt("data.txt",delimiter=",")

In [None]:
# Random samples useful for plotting
rng=np.random.default_rng(0)
x=rng.normal(0,1,100)
y=rng.normal(0,1,100)
sizes=rng.integers(10,200,100)

# Pandas

In [None]:
# Load data
df=pd.read_csv("data.csv")                   # basic load
df=pd.read_csv("data.csv",parse_dates=["date"],dayfirst=False)  # parse dates

# Inspect
df.head(3)                                   # first rows
df.info()                                    # dtypes and nulls
df.describe(numeric_only=True)               # summary stats

# Types and categories
df["id"]=df["id"].astype("int64")
df["cat"]=df["cat"].astype("category")

In [None]:
# Select columns/rows
df["col"]; df[["a","b"]]                     # cols
df.loc[df["a"]>0,["a","b"]]                  # label filter + subset
df.iloc[0:5,0:2]                             # position-based

# Filter and assign
df=df[df["score"].ge(50)]                    # keep rows
df["ratio"]=df["x"]/df["y"]                  # new col
df=df.assign(z=lambda d:d.x+d.y)             # assign with lambda

# Missing values
df.isna().sum()                              # null counts
df=df.dropna(subset=["a","b"])               # drop if a or b is NaN
df["a"]=df["a"].fillna(0)                    # fill

# Duplicates
df=df.drop_duplicates(subset=["id","date"],keep="last")

# Value counts and uniques
df["city"].value_counts(normalize=True)      # proportions
df["city"].nunique()

# Sorting and ranking
df=df.sort_values(["score","date"],ascending=[False,True])
df["rk"]=df["score"].rank(method="dense",ascending=False)

In [None]:
# Grouping and aggregation
grp=df.groupby("category")["value"].agg(["count","mean","sum"])
out=df.groupby("city").agg({
    "sales":"sum",       # total sales per city
    "orders":"mean",     # avg orders per city
    "date":"max"         # latest date per city
})

In [None]:
# Dates and strings
df["date"]=pd.to_datetime(df["date"])
df["year"]=df["date"].dt.year
df["name"]=df["name"].str.strip().str.title()

In [None]:
# Merge / join
left.merge(right,how="left",on="key")        # keys must exist in both
pd.concat([df1,df2],axis=0,ignore_index=True) # append rows and axis=1 for columns
# Pivot
pd.pivot_table(df,index="city",columns="month",values="sales",aggfunc="sum",fill_value=0)