<a href="https://colab.research.google.com/github/chinedm/Alert-System-for-Safety-in-Taxi-Rides/blob/main/Drive_Alert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import librosa


In [None]:
import nara_wpe

In [None]:
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

In [None]:
import pyannote.audio
from datasets import load_dataset
import csv

  torchaudio.set_audio_backend("soundfile")


In [None]:
!pip install pyannote.audio

In [None]:
# instantiate the model
from pyannote.audio import Model
model = Model.from_pretrained(
  "pyannote/segmentation-3.0",
  use_auth_token="HUGGINGFACE_ACCESS_TOKEN_GOES_HERE")


In [None]:
pip install pyannote.audio --extra-index-url https://huggingface.co/pytorch/api/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

In [None]:
import nlpaug.augmenter.word as naw
import torch
import torch.nn as nn
import smtplib
from email.mime.text import MIMEText
import requests
import sqlite3
from flask import Flask, render_template, request

RuntimeError: Failed to import transformers.pipelines because of the following error (look up to see its traceback):
DLL load failed while importing _pywrap_tf2: A dynamic link library (DLL) initialization routine failed.

In [None]:
!pip install nlpaug

In [None]:
# Data Preprocessing
def remove_noise(audio_data, sampling_rate):
    # Noise removal using Weighted Prediction Error (WPE)
    wpe = nara_wpe.wpe.WPE(taps=10, delay=3, normalize_srft=True, statistics_mode="sample")
    enhanced_audio = wpe.run(audio_data, sampling_rate)
    return enhanced_audio

def diarize_speakers(audio_data, sampling_rate):
    # Speaker diarization
    speech_activity_detection = pyannote.audio.pipelines.SpeechActivityDetection()
    speech_segments = speech_activity_detection(audio_data, sampling_rate)

    speaker_diarization = pyannote.audio.pipelines.SpeakerDiarization()
    speakers = speaker_diarization(audio_data, sampling_rate, speech_segments)

    return speakers

def transcribe_audio(audio_data, sampling_rate, model, processor):
    # Automatic Speech Recognition (ASR)
    transcription = model.transcribe(audio_data, sampling_rate=sampling_rate, language="en", processor=processor)
    return transcription

def preprocess_audio(audio_data, sampling_rate):
    cleaned_audio = remove_noise(audio_data, sampling_rate)
    speakers = diarize_speakers(cleaned_audio, sampling_rate)
    model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
    processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
    transcription = transcribe_audio(cleaned_audio, sampling_rate, model, processor)
    return cleaned_audio, speakers, transcription

In [None]:
# NLP Model Training
def prepare_dataset():
    # Prepare a labeled dataset
    labels = {"safe": 0, "threat": 1, "harassment": 2, "violence": 3}
    with open("taxi_safety_dataset.csv", "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["text", "label"])
        writer.writerow(["Can you please take me to the airport?", labels["safe"]])
        writer.writerow(["I'll hurt you if you don't give me your money.", labels["threat"]])
        # Add more examples as needed

    dataset = load_dataset("csv", data_files="taxi_safety_dataset.csv")
    return dataset, labels

def train_nlp_models(dataset, labels):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    tokenized_datasets = dataset.map(lambda examples: tokenizer(examples["text"], truncation=True), batched=True)

    model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(labels))

    training_args = TrainingArguments(output_dir="./results", num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=16, warmup_steps=500, weight_decay=0.01, logging_dir="./logs", logging_steps=10)

    trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["test"])
    trainer.train()

    # Fine-tune the model for NER and sentiment analysis
    # ... (code omitted for brevity)

def augment_data(dataset):
    augmentations = [naw.EasyDataAugmentation.synonym_replacement, naw.EasyDataAugmentation.random_swap, naw.EasyDataAugmentation.random_deletion]
    augmented_dataset = []
    for text, label in zip(dataset["text"], dataset["label"]):
        augmented_texts = [text]
        for aug in augmentations:
            augmented_texts.extend(aug(text))

        for augmented_text in augmented_texts:
            augmented_dataset.append((augmented_text, label))

    with open("augmented_taxi_safety_dataset.csv", "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["text", "label"])
        for text, label in augmented_dataset:
            writer.writerow([text, label])


In [None]:
# Real-time Inference
def real_time_inference(audio_stream, asr_model, processor, nlp_models):
    transcribed_text = transcribe_audio(audio_stream, sampling_rate, asr_model, processor)

    # NLP processing
    keywords, entities, sentiment = process_text(transcribed_text, nlp_models)
    potential_threat = evaluate_threat(keywords, entities, sentiment)

    if potential_threat:
        generate_alert(potential_threat)
        notify_contacts(potential_threat)



In [None]:
# Alert Generation and Notification
def generate_alert(threat_level, details):
    # Generate an alert based on the identified threat level and details
    if threat_level == "high":
        logger.critical(f"High-level threat detected: {details}")
        # Trigger emergency response or escalation procedures
    elif threat_level == "medium":
        logger.warning(f"Medium-level threat detected: {details}")
        # Notify designated contacts or monitoring station
    elif threat_level == "low":
        logger.info(f"Low-level threat detected: {details}")
        # Log the incident for potential follow-up or analysis
    else:
        logger.error(f"Invalid threat level: {threat_level}")

def send_email_alert(recipient, subject, body):
    # Send an email alert to the specified recipient
    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["From"] = "alerts@taxisafety.com"
    msg["To"] = recipient

    with smtplib.SMTP("smtp.gmail.com", 587) as smtp:
        smtp.starttls()
        smtp.login("your_email@gmail.com", "your_password")
        smtp.send_message(msg)

def send_sms_alert(phone_number, message):
    # Send an SMS alert to the specified phone number
    # Use a third-party SMS service API (e.g., Twilio, Plivo, etc.)
    pass


In [None]:
# Continuous Learning and Improvement
def update_dataset(audio_stream, transcribed_text, potential_threat):
    # Store incident data for continuous learning
    pass

def retrain_models():
    # Periodically retrain and fine-tune the NLP models using the updated labeled data
    pass

# Deployment and Integration
# Package the entire system as a containerized application or a microservice architecture
# Integrate with taxi dispatch systems, location tracking, and other relevant services



In [None]:
# User Interface or Dashboard
app = Flask(__name__)

# Database setup
conn = sqlite3.connect("alerts.db")
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS alerts
             (id INTEGER PRIMARY KEY AUTOINCREMENT, threat_level TEXT, details TEXT, timestamp TEXT)''')

@app.route("/")
def index():
    c.execute("SELECT * FROM alerts")
    alerts = c.fetchall()
    return render_template("index.html", alerts=alerts)

@app.route("/create_alert", methods=["POST"])
def create_alert():
    threat_level = request.form["threat_level"]
    details = request.form["details"]
    timestamp = request.form["timestamp"]
    c.execute("INSERT INTO alerts (threat_level, details, timestamp) VALUES (?, ?, ?)", (threat_level, details, timestamp))
    conn.commit()
    return "Alert created successfully"

if __name__ == "__main__":
    app.run(debug=True)



In [None]:
# RNN Models
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out