## Download model with wget

In [5]:
# run once:

# !wget https://github.com/DataTalksClub/machine-learning-zoomcamp/raw/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin

# wget does not work under uv run - so I have to use something like requests:
# import requests

# url = 'https://github.com/DataTalksClub/machine-learning-zoomcamp/raw/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin'
# response = requests.get(url)

# with open("pipeline_v1.bin", "wb") as f:
#     f.write(response.content)

# Imports

import pickle

# read the pipeline/model from disk:

with open('pipeline_v1.bin', 'rb') as f_in:
    pipeline = pickle.load(f_in)

pipeline

# version conflict appears, so plan B is to go back to 
# requires-python = ">=3.12,<3.13" in toml
# uv venv --python 3.12
# uv sync
# uv add scikit-learn==1.6.1
# and then
# uv sync
# 
# or use my local python on windows - it is 3.12 - and skip uv ))

# Q1 - UV version - uv 0.9.0 (39b688653 2025-10-07)
# Q2 - SKL
# sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e

0,1,2
,steps,"[('dictvectorizer', ...), ('logisticregression', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,dtype,<class 'numpy.float64'>
,separator,'='
,sparse,True
,sort,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'liblinear'
,max_iter,100


## Question 3

In [7]:
# Question 3
# Let's use the model!

# Write a script for loading the pipeline with pickle
# Score this record:
# {
#     "lead_source": "paid_ads",
#     "number_of_courses_viewed": 2,
#     "annual_income": 79276.0
# }
# What's the probability that this lead will convert?

# 0.333
# 0.533 == 0.534
# 0.733
# 0.933
# If you're getting errors when unpickling the files, check their checksum:

# $ md5sum pipeline_v1.bin
# 7d17d2e4dfbaf1e408e1a62e6e880d49 *pipeline_v1.bin


# make predictions using our pipeline / model

datapoint = {
    "lead_source": "paid_ads",
    "number_of_courses_viewed": 2,
    "annual_income": 79276.0
}

result = pipeline.predict_proba(datapoint)[0, 1]
print(f'Probability that this lead will convert: {result:.3f}')
# Probability that this lead will convert: 0.534

Probability that this lead will convert: 0.534


## Question 4

In [9]:
# Question 4
# Now let's serve this model as a web service

# Install FastAPI
# Write FastAPI code for serving the model
# Now score this client using requests:
# url = "YOUR_URL"
# client = {
#     "lead_source": "organic_search",
#     "number_of_courses_viewed": 4,
#     "annual_income": 80304.0
# }
# requests.post(url, json=client).json()
# What's the probability that this client will get a subscription?

# 0.334
# 0.534 == 0.534041
# 0.734
# 0.934

In [12]:
# created file predict_hw5.py 
# import pickle
# from typing import Literal
# from fastapi import FastAPI
# from pydantic import BaseModel
# import uvicorn


# class Customer(BaseModel):
#     lead_source: Literal["organic_search", "ads", "referral", "social_media"]
#     number_of_courses_viewed: int
#     annual_income: float


# class PredictResponse(BaseModel):
#     conversion_probability: float
#     convert: bool


# app = FastAPI(title="lead-conversion-prediction")

# with open("pipeline_v1.bin", "rb") as f:
#     model = pickle.load(f)


# def predict_single(data):
#     return float(model.predict_proba([data])[0, 1])


# @app.post("/predict", response_model=PredictResponse)
# def predict(customer: Customer):
#     prob = predict_single(customer.model_dump())
#     return {"conversion_probability": prob, "convert": prob >= 0.5}


# if __name__ == "__main__":
#     uvicorn.run(app, host="0.0.0.0", port=9696)

# run it using << uv run uvicorn predict_hw5:app --host 0.0.0.0 --port 9696 --reload >>
# check if my fastapi app is running and play with api here << http://localhost:9696/docs >>

import requests

url = 'http://localhost:9696/predict'

client = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}
# requests.post(url, json=client).json()

response = requests.post(url, json=client)

predictions = response.json()
predictions
# {'conversion_probability': 0.5340417283801275, 'convert': True}


{'conversion_probability': 0.5340417283801275, 'convert': True}

## Question 5

In [14]:
# Question 5
# Download the base image agrigorev/zoomcamp-model:2025. You can easily make it by using docker pull command.

# So what's the size of this base image?

# 45 MB
# 121 MB 
# 245 MB == 181.12 MB - higher value...
# 330 MB
# You can get this information when running docker images - it'll be in the "SIZE" column.
# If your answer doesn't match options exactly, select the closest one. If the answer is exactly in between two options, select the higher value.

# Digest: sha256:14d79fde0bbf078eb18c99c2bd007205917b758ec11060b2994963a1e485c2ae
# Status: Downloaded newer image for agrigorev/zoomcamp-model:2025
# docker.io/agrigorev/zoomcamp-model:2025
# agrigorev/zoomcamp-model:2025
# 14d79fde0bbf

# CREATED
# 7 days ago

# SIZE
# 181.12 MB
# agrigorev/zoomcamp-model   2025      14d79fde0bbf   6 days ago    181MB

## Question 6

In [20]:
# Question 6
# Dockerfile
# Now create your own Dockerfile based on the image we prepared.

# It should start like that:

# FROM agrigorev/zoomcamp-model:2025
# # add your stuff here
# Now complete it:

# Install all the dependencies from pyproject.toml
# Copy your FastAPI script
# Run it with uvicorn
# After that, you can build your docker image.

# Let's run your docker container!

# After running it, score this client once again:

# url = "YOUR_URL"
# client = {
#     "lead_source": "organic_search",
#     "number_of_courses_viewed": 4,
#     "annual_income": 80304.0
# }
# requests.post(url, json=client).json()
# What's the probability that this lead will convert?

# 0.39
# 0.59 == 0.534
# 0.79
# 0.99

url = 'http://localhost:9696/predict'

client = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}
requests.post(url, json=client).json()
# {'conversion_probability': 0.5340417283801275, 'convert': True}

{'conversion_probability': 0.5340417283801275, 'convert': True}

In [None]:
# # Dockerfile

# # Created new dockerfile for homework week 5 specifically
# # Question 6 - it should start like that:

# FROM agrigorev/zoomcamp-model:2025

# COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

# WORKDIR /code

# ENV PATH="/code/.venv/bin:$PATH"

# # Install all the dependencies from pyproject.toml
# COPY "pyproject.toml" "uv.lock" ".python-version" ./

# # Actually install dependencies 
# RUN uv sync 

# # Copy your FastAPI script
# # Run it with uvicorn
# # After that, you can build your docker image.

# # Copy application code and model data into the container
# COPY "predict_hw5.py" "pipeline_v1.bin" ./

# # Expose TCP port 9696 so it can be accessed from outside the container
# EXPOSE 9696
# # Run the application using uvicorn (ASGI server)
# # predict_hw5:app → refers to 'app' object inside predict_hw5.py
# # --host 0.0.0.0 → listen on all interfaces
# # --port 9696    → listen on port 9696
# ENTRYPOINT ["uvicorn", "predict_hw5:app", "--host", "0.0.0.0", "--port", "9696"]

# # Build it:

# # docker build -t predict-convert .

# # And run it:

# # docker run -it --rm -p 9696:9696 predict-convert