In [1]:
import pandas as pd
import spacy
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# Load spaCy model (you can choose a specific language model)
nlp = spacy.load("en_core_web_sm")

# Load NLTK stopwords
nltk_stopwords = set(stopwords.words("english"))

# Load your training data from the CSV file
def load_training_data(csv_file):
    df = pd.read_csv(csv_file)
    df["date_time"] = pd.to_datetime(df["date_time"], format='mixed')
    return df

# Preprocess SMS text
def preprocess_text(text):
    # Tokenize and remove stopwords
    tokens = word_tokenize(text.lower())
    filtered_tokens = [token for token in tokens if token not in nltk_stopwords]
    return " ".join(filtered_tokens)

# Extract relevant information from SMS
def extract_info_from_sms(sms_text):
    doc = nlp(sms_text)
    # Customize this part based on your training data columns
    date_time = doc.ents[0].text
    amount = doc.ents[1].text
    transaction_type = doc.ents[2].text
    fees = doc.ents[3].text
    account = doc.ents[4].text
    receiver_sender = doc.ents[5].text
    return date_time, amount, transaction_type, fees, account, receiver_sender, 

# Example usage
if __name__ == "__main__":
    csv_file_path = "sms.csv"
    training_data = load_training_data(csv_file_path)

    # Assuming you have an SMS message
    sample_sms = "You have received 50000 RWF from Mathieu KALIMWABO (*********316) on your mobile money account at 2024-03-04 11:40:02. Message from sender: . Your new balance:52307 RWF. Financial Transaction Id: 12989274595."
    preprocessed_sms = preprocess_text(sample_sms)
    date_time, amount, transaction_type, fees, account, receiver_sender, = extract_info_from_sms(preprocessed_sms)

    print("Extracted Information:")
    print(f"Date and Time: {date_time}")
    print(f"Amount: {amount}")
    print(f"Transaction Type: {transaction_type}")
    print(f"Fees: {fees}")
    print(f"Account: {account}")
    print(f"Receiver/Sender: {receiver_sender}")


Extracted Information:
Date and Time: 50000
Amount: 316
Transaction Type: 2024-03-04
Fees: 11:40:02
Account: new balance:52307
Receiver/Sender: 12989274595
