# Task


## Load data




In [None]:
import pandas as pd

df = pd.read_excel('cleaned_samsung_data-1 (1).xlsx')
print(df.head())

Unnamed: 0,name,ratings,price,storage_ram,os_processor,camera,display,network,battery,ram_gb
0,"SAMSUNG Galaxy F13 (Sunrise Copper, 64 GB)",4.3,9499,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 12Processor TypeExynos...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,"Network Type4G, 3G, 2GSupported Networks4G LTE...",Battery Capacity6000 mAhDual BatteryNo,4
1,"SAMSUNG Galaxy F13 (Waterfall Blue, 128 GB)",4.3,10499,Internal Storage128 GBRAM4 GBExpandable Storag...,Operating SystemAndroid 12Processor TypeExynos...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,"Network Type4G, 3G, 2GSupported Networks4G LTE...",Battery Capacity6000 mAhDual BatteryNo,4
2,"SAMSUNG Galaxy F13 (Nightsky Green, 128 GB)",4.3,10499,Internal Storage128 GBRAM4 GBExpandable Storag...,Operating SystemAndroid 12Processor TypeExynos...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,"Network Type4G, 3G, 2GSupported Networks4G LTE...",Battery Capacity6000 mAhDual BatteryNo,4
3,"SAMSUNG Galaxy F13 (Sunrise Copper, 128 GB)",4.3,10499,Internal Storage128 GBRAM4 GBExpandable Storag...,Operating SystemAndroid 12Processor TypeExynos...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,"Network Type4G, 3G, 2GSupported Networks4G LTE...",Battery Capacity6000 mAhDual BatteryNo,4
4,"SAMSUNG Galaxy F23 5G (Aqua Blue, 128 GB)",4.3,15499,Internal Storage128 GBRAM6 GBTotal Memory128 G...,Operating SystemAndroid 12Processor TypeQualco...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity5000 mAh,6


## Model training




In [None]:
from sklearn.linear_model import LinearRegression
import joblib

# Select features and target
X = df[['price', 'ram_gb']]
y = df['ratings']

# Instantiate the model
model = LinearRegression()

# Train the model
model.fit(X, y)

# Save the model
joblib.dump(model, 'linear_regression_model.pkl')

print("Model training completed and model saved as linear_regression_model.pkl")

Model training completed and model saved as linear_regression_model.pkl


## Api implementation




In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import pandas as pd

# Load the trained model
model = joblib.load('linear_regression_model.pkl')

# Define the input data structure
class PredictionInput(BaseModel):
    price: float
    ram_gb: int

# Create a FastAPI application instance
app = FastAPI()

# Define the prediction endpoint
@app.post("/predict")
def predict_rating(data: PredictionInput):
    # Convert input data to a pandas DataFrame
    input_df = pd.DataFrame({'price': [data.price], 'ram_gb': [data.ram_gb]})

    # Make prediction
    prediction = model.predict(input_df)

    # Return the prediction
    return {"predicted_rating": prediction[0]}

print("FastAPI code for /predict endpoint generated.")

FastAPI code for /predict endpoint generated.


## Api testing




In [None]:
import requests
import subprocess
import time

# Assume the FastAPI code is in a file named 'main.py'
# Create a dummy main.py file with the FastAPI code
fastapi_code = """
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import pandas as pd

# Load the trained model
model = joblib.load('linear_regression_model.pkl')

# Define the input data structure
class PredictionInput(BaseModel):
    price: float
    ram_gb: int

# Create a FastAPI application instance
app = FastAPI()

# Define the prediction endpoint
@app.post("/predict")
def predict_rating(data: PredictionInput):
    # Convert input data to a pandas DataFrame
    input_df = pd.DataFrame({'price': [data.price], 'ram_gb': [data.ram_gb]})

    # Make prediction
    prediction = model.predict(input_df)

    # Return the prediction
    return {"predicted_rating": prediction[0]}
"""

with open("main.py", "w") as f:
    f.write(fastapi_code)

# Start the FastAPI application in the background
process = subprocess.Popen(["uvicorn", "main:app", "--reload"])

# Give the server some time to start
time.sleep(5)

# Define the URL for the predict endpoint
url = "http://127.0.0.1:8000/predict"

# Define sample data for testing
sample_data = {
    "price": 15000.0,
    "ram_gb": 6
}

# Send a POST request to the endpoint
try:
    response = requests.post(url, json=sample_data)
    response.raise_for_status() # Raise an exception for bad status codes
    prediction_result = response.json()

    # Verify the response
    print("API Response:", prediction_result)
    if 'predicted_rating' in prediction_result and isinstance(prediction_result['predicted_rating'], (int, float)):
        print("Test passed: Received a valid numerical prediction.")
    else:
        print("Test failed: Response does not contain a valid numerical prediction.")

except requests.exceptions.RequestException as e:
    print(f"Test failed: Could not connect to the API or received an error: {e}")

finally:
    # Stop the FastAPI process
    process.terminate()
    process.wait()
    print("FastAPI process terminated.")

API Response: {'predicted_rating': 4.207876406701569}
Test passed: Received a valid numerical prediction.
FastAPI process terminated.


## Dockerization




In [None]:
# Create Dockerfile
dockerfile_content = """
FROM python:3.9-slim

WORKDIR /app

COPY main.py .
COPY linear_regression_model.pkl .

RUN pip install fastapi uvicorn pandas scikit-learn joblib

EXPOSE 8000

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
"""

with open("Dockerfile", "w") as f:
    f.write(dockerfile_content)

print("Dockerfile created successfully.")

Dockerfile created successfully.


## Docker build and run




In [None]:
import subprocess
import time

# Build the Docker image
print("Building Docker image...")
build_process = subprocess.run(
    ["docker", "build", "-t", "fastapi-ml-app", "."],
    capture_output=True,
    text=True,
    encoding="utf-8"   # <--- add this line
)

print(build_process.stdout)
if build_process.stderr:
    print("Build Error:", build_process.stderr)
    # Decide how to handle build errors, maybe finish_task with failure
    # For now, let's assume the build was successful enough to proceed or handle it later.
    pass # Or sys.exit(1)

# Run the Docker container
print("Running Docker container...")
# Use -d to run in detached mode (in the background)
run_process = subprocess.run(
    ["docker", "run", "-d", "-p", "8000:8000", "fastapi-ml-app"],
    capture_output=True,
    text=True,
    encoding="utf-8"
)

print(run_process.stdout)
if run_process.stderr:
    print("Run Error:", run_process.stderr)
    # Handle run errors, maybe finish_task with failure
    pass # Or sys.exit(1)

# Give the container some time to start
time.sleep(5)

# Verify the container is running (optional but good practice)
print("Verifying container is running...")
check_process = subprocess.run(
    ["docker", "ps"],
    capture_output=True,
    text=True,
    encoding="utf-8"
)

print(check_process.stdout)
if "fastapi-ml-app" in check_process.stdout:
    print("Container is running successfully.")
else:
    print("Failed to run container.")
    # Handle verification failure, maybe finish_task with failure
    pass # Or sys.exit(1)


Building Docker image...


FileNotFoundError: [Errno 2] No such file or directory: 'docker'

## Container testing




In [None]:
import requests
import time

# Define the URL for the predict endpoint running on the locally hosted Docker container
# Assuming the container is running and port 8000 is mapped to the host's 8000
url = "http://127.0.0.1:8000/predict"

# Define sample data for testing
sample_data = {
    "price": 15000.0,
    "ram_gb": 6
}

# Send a POST request to the endpoint
try:
    print(f"Sending POST request to {url} with data: {sample_data}")
    # Set a timeout for the request in case the container is not responsive
    response = requests.post(url, json=sample_data, timeout=10)
    response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)

    prediction_result = response.json()

    # Verify the response
    print("API Response:", prediction_result)
    if 'predicted_rating' in prediction_result and isinstance(prediction_result['predicted_rating'], (int, float)):
        print("Test passed: Received a valid numerical prediction.")
    else:
        print("Test failed: Response does not contain a valid numerical prediction or the value is not a number.")

except requests.exceptions.Timeout:
    print(f"Test failed: Request timed out after 10 seconds. The API might not be running or accessible at {url}.")
except requests.exceptions.ConnectionError:
    print(f"Test failed: Could not connect to the API at {url}. Make sure the Docker container is running and the port is correctly mapped.")
except requests.exceptions.RequestException as e:
    print(f"Test failed: An error occurred during the request: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Sending POST request to http://127.0.0.1:8000/predict with data: {'price': 15000.0, 'ram_gb': 6}
Test failed: Could not connect to the API at http://127.0.0.1:8000/predict. Make sure the Docker container is running and the port is correctly mapped.


## Summary:

### Data Analysis Key Findings

*   A Linear Regression model was trained using 'price' and 'ram\_gb' as features to predict 'ratings'.
*   The trained model was successfully saved to a file named `linear_regression_model.pkl`.
*   A FastAPI application with a `/predict` endpoint was successfully implemented to load the trained model and make predictions based on provided 'price' and 'ram\_gb' inputs.
*   Local testing of the FastAPI application's `/predict` endpoint was successful, showing that it could accept input data and return a numerical prediction (e.g., `{'predicted_rating': 4.207876406701569}`).
*   A Dockerfile was successfully created to containerize the FastAPI application, including dependencies, the application code (`main.py`), and the trained model file (`linear_regression_model.pkl`).
*   Attempts to build and run the Docker image and subsequently test the API within the container failed because the `docker` command was not found in the execution environment.

### Insights or Next Steps

*   The core components of the task (model training, API development, and Dockerfile creation) were successfully completed individually.
*   To fully complete the task, the Docker environment needs to be accessible to build and run the container, allowing for testing the deployed API.
