# AI/ML Internship Tasks

### Completed Tasks: Task 1, Task 4, Task 6

## Task 1: Exploring and Visualizing a Simple Dataset

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
iris = sns.load_dataset('iris')

# Basic inspection
print("Shape:", iris.shape)
print("Columns:", iris.columns)
print("First 5 rows:\n", iris.head())

# Info and Description
print("\nInfo:")
print(iris.info())
print("\nDescription:")
print(iris.describe())

In [None]:
# Scatter plot
sns.scatterplot(data=iris, x='sepal_length', y='petal_length', hue='species')
plt.title("Sepal Length vs Petal Length")
plt.show()

In [None]:
# Histograms
iris.hist(figsize=(10, 8))
plt.suptitle("Histograms of Iris Features")
plt.show()

In [None]:
# Box plots
sns.boxplot(data=iris)
plt.title("Boxplot of Iris Features")
plt.xticks(rotation=45)
plt.show()

## Task 4: General Health Query Chatbot (Prompt Engineering)

In [None]:
# NOTE: Requires OpenAI API Key to run.
# import openai
# openai.api_key = 'your-api-key-here'

# def health_chatbot(query):
#     prompt = f"Act like a helpful medical assistant. {query}"
#     response = openai.ChatCompletion.create(
#         model="gpt-3.5-turbo",
#         messages=[{"role": "user", "content": prompt}]
#     )
#     return response['choices'][0]['message']['content']

# Example queries:
# print(health_chatbot("What causes a sore throat?"))
# print(health_chatbot("Is paracetamol safe for children?"))

## Task 6: House Price Prediction

In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Load and prepare data
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['PRICE'] = data.target

# Features and target
X = df.drop('PRICE', axis=1)
y = df['PRICE']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("\nMAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))

In [None]:
# Plot actual vs predicted
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.plot([y.min(), y.max()], [y.min(), y.max()], '--r')
plt.show()