In [1]:
pip install pandas scikit-learn tensorflow fastapi uvicorn[standard]


Collecting fastapi
  Downloading fastapi-0.110.1-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn[standard]
  Downloading uvicorn-0.29.0-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting starlette<0.38.0,>=0.37.2 (from fastapi)
  Downloading starlette-0.37.2-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting h11>=0.8 (from uvicorn[standard])
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httptools>=0.5.0 (from uvicorn[standard])
  Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17

In [6]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from fastapi import FastAPI
from pydantic import BaseModel

# Load the dataset (assuming the dataset is in JSON format)
# Replace 'dataset.json' with the actual file path
data = pd.read_json('dataset.json')

# Print column names
print(data.columns)

# Data Preprocessing
# Assuming the dataset has 'external_status' and 'internal_status' columns
# Cleaning and formatting the external status descriptions
data['externalStatus'] = data['externalStatus'].apply(lambda x: x.lower())  # Convert to lowercase
data['externalStatus'] = data['externalStatus'].str.replace('[^a-zA-Z\s]', '')  # Remove non-alphabetic characters

# Encode internal status labels
label_encoder = LabelEncoder()
data['internal_status_encoded'] = label_encoder.fit_transform(data['internalStatus'])

# Split data into features and target
X = data['externalStatus']
y = data['internal_status_encoded']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Convert external status descriptions to numerical data using Tokenization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Tokenize the external status descriptions
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

# Pad sequences to ensure uniform length
max_length = max([len(seq) for seq in X_train_sequences])
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_length, padding='post')

# Define the TensorFlow model
model = Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train_padded, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_padded, y_test)
print("Accuracy:", accuracy)
# Define the TensorFlow model
model = Sequential([
    Dense(64, activation='relu', input_shape=(1,)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


# API Development
# Define FastAPI app
app = FastAPI()

# Define request body model
class Item(BaseModel):
    external_status: str

# Define API endpoint
@app.post("/predict/")
async def predict_internal_status(item: Item):
    external_status = item.external_status.lower().replace('[^a-zA-Z\s]', '')
    encoded_status = label_encoder.transform([external_status])
    prediction = model.predict(encoded_status)
    predicted_label = label_encoder.inverse_transform([np.round(prediction)])
    return {"predicted_internal_status": predicted_label[0]}

# Testing and Validation:
# Test the API thoroughly to ensure functionality and accuracy.

# Documentation:
# Comments within the code explain each step and provide context for future reference.


Index(['externalStatus', 'internalStatus'], dtype='object')
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.2448979616165161


In [8]:
pip install uvicorn


