<a href="https://colab.research.google.com/github/bruhseriously/AIR/blob/main/LightGBM_Llama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GPT-neo

In [None]:
!pip install lightgbm
!pip install transformers

In [None]:
import lightgbm as lgb
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")

In [None]:
def generate_response(input_text):
    """
    Simulate GPT processing the input text and extracting structured data
    """
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [None]:
# Simulate the GPT output (usually this would be parsing the response, but here it's mocked)
guest_request = "Please delay vessel XYZ by 2 hours due to a storm."
print(f"Guest Request: {guest_request}")

structured_data = {
    'vessel_id': 'XYZ',
    'delay_hours': 2,
    'priority_reason': 1,  # High priority due to storm
    'customer_importance': 3  # High customer importance
}

print(f"Structured Data Extracted: {structured_data}")

In [None]:
# Train a simple LightGBM model to rank vessels (you can imagine this would be pre-trained and stored)
# Simulating some simple training data for LightGBM
data = {
    'vessel_id': [1, 2, 3, 4, 5],
    'delay_hours': [2, 5, 1, 3, 4],
    'priority_reason': [0, 1, 0, 1, 1],  # 0: Low priority, 1: High priority
    'customer_importance': [3, 2, 1, 3, 2]  # High customer importance for some
}

import pandas as pd
df = pd.DataFrame(data)

# Features and target (let's assume the target is the priority, 1 is high priority, 0 is low priority)
X = df[['delay_hours', 'priority_reason', 'customer_importance']]
y = np.array([1, 0, 1, 0, 1])  # Target labels (1: high priority, 0: low priority)

# Train LightGBM model
train_data = lgb.Dataset(X, label=y)
params = {
    'objective': 'binary',
    'metric': 'binary_error'
}
model_lgb = lgb.train(params, train_data, 100)

# 4. Rank the incoming vessel based on the structured data (delay, priority, customer importance)
X_input = np.array([[structured_data['delay_hours'],
                     structured_data['priority_reason'],
                     structured_data['customer_importance']]])

# Predict the priority (higher value = higher priority)
priority = model_lgb.predict(X_input)
print(f"LightGBM Model Priority Score for Vessel {structured_data['vessel_id']}: {priority[0]}")

In [None]:
# 5. Simulate rescheduling decision based on LightGBM's priority
if priority > 0.5:
    reschedule_message = f"Vessel {structured_data['vessel_id']} is moved to higher priority due to the storm and will be delayed by {structured_data['delay_hours']} hours."
else:
    reschedule_message = f"Vessel {structured_data['vessel_id']} remains on the current schedule."

# 6. Output the final decision (rescheduling)
print(f"Rescheduling Decision: {reschedule_message}")


# Llama

In [None]:
!pip install sentencepiece #library for text tokenization
!pip install -U bitsandbytes #library for loading and managing models
!pip install -q transformers torch accelerate #libraries for working with HF models and Pytorch
!pip install lightgbm

import lightgbm as lgb
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
from threading import Thread
import numpy as np

In [None]:
# Initialize LLaMA Model and Tokenizer
model_id = "NousResearch/Meta-Llama-3.1-8B-Instruct"
device = "cuda"  # Use GPU for faster processing

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    quantization_config=quantization_config
)

def generate_llama_response(input_text, max_new_tokens=4096):
    """
    Function to use LLaMA model to process text and extract actionable data
    """
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Example guest request
guest_request = "Please delay vessel XYZ by 2 hours due to a storm."
print(f"Guest Request: {guest_request}")

# Generate LLaMA response (e.g., extracting relevant data)
llama_response = generate_llama_response(guest_request)
print(f"LLaMA Response: {llama_response}")

In [None]:
# Simulated structured data based on LLaMA response (you'd normally parse the response)
structured_data = {
    'vessel_id': 'XYZ',
    'delay_hours': 2,
    'priority_reason': 1,  # High priority due to storm
    'customer_importance': 3  # High customer importance
}

# Simulating a simple LightGBM model
data = {
    'vessel_id': [1, 2, 3, 4, 5],
    'delay_hours': [2, 5, 1, 3, 4],
    'priority_reason': [0, 1, 0, 1, 1],  # 0: Low priority, 1: High priority
    'customer_importance': [3, 2, 1, 3, 2]  # High customer importance for some
}

# Training LightGBM Model
import pandas as pd
df = pd.DataFrame(data)
X = df[['delay_hours', 'priority_reason', 'customer_importance']]
y = np.array([1, 0, 1, 0, 1])  # 1: high priority, 0: low priority

train_data = lgb.Dataset(X, label=y)
params = {
    'objective': 'binary',
    'metric': 'binary_error'
}
lgb_model = lgb.train(params, train_data, 100)

# Predict the priority of the incoming request using LightGBM
X_input = np.array([[structured_data['delay_hours'],
                     structured_data['priority_reason'],
                     structured_data['customer_importance']]])

priority = lgb_model.predict(X_input)
print(f"LightGBM Priority Score: {priority[0]}")

In [None]:
# Simulate rescheduling decision
if priority > 0.5:
    reschedule_message = f"Vessel {structured_data['vessel_id']} moved to higher priority."
else:
    reschedule_message = f"Vessel {structured_data['vessel_id']} remains on current schedule."

print(f"Rescheduling Decision: {reschedule_message}")