In [None]:
import numpy as np
import pandas as pd

from calendar import day_name
from datetime import datetime, timedelta, date

In [None]:
import spacy

In [None]:
from requests import get
from sqlalchemy import create_engine

In [None]:
database="ditravo"
host="dev-pg.ditravo.com"
user="postgres"
password=""
port="5432"
table = 'inventory.passenger_type'

# __init__.py
# CreateApp
- create_extensions
- create_swagger_ui
- create_configuration
- create_app

# utils.py
# Process
- process_missing_destionation ???
- process_stations (if length = 1 --> get default location(GPS))
- process_year (year of the date 2022 --> 2023)
- process_days
- process_dates (if length = 1 --> Gidiş, if length = 0 tomorrow
- process_passengers

# Format
- format_response_dictionary
- format_ner_data
- format_entities

# Assemble
- assemble_link

# Apply(Assemble, Process)
- apply_ngram_model()
- apply_ner_model()
- apply_lemmatization_model
- apply()

# service.py
# Extract(Apply, Request)
- extract_stations
- extract_date_information
- extract_passengers_information
- extract

# data
# dao.py
# Save(Format)
- save_entities()
- save_into_database()

# request.py
# Request
- request_destionation_information

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
text = u'In September 14, Apple, Google, and Microsoft plan to expand their support for the passwordless sign-on standard\
         created for common use by the FIDO Alliance and the World Wide Web Consortium (W3C) in October 19, as part of\
         their work together to make the web more secure and convenient. announced. With the new feature,\
         websites and apps will be able to provide consistent, secure and easy password-free logins to \
         150 consumers across different devices and platforms.'

In [None]:
text

In [None]:
document = nlp(text)

In [None]:
nlp.pipeline

In [None]:
for entity in document.ents: print(entity.text, entity.label_)

In [None]:
class Apply():

    def apply_ner_model(self, text):
        
        document = nlp(text)
        return document
    
apply = Apply()

In [None]:
document = apply.apply_ner_model(text)

In [None]:
class Request():

    def request_stations(self):
        stations = get('http://dev-rail.ditravo.com:8100/stations/yhtStations').json()
        return stations

request = Request()
stations = request.request_stations()


In [None]:
class Format():

    def format_stations(self, stations):

        station_codes = {station['name'] : station['IATACode'] for station in stations}
        return station_codes
    
    def format_passenger_types(self, passenger_type_df):

        passenger_type_dictionary = dict(zip(passenger_type_df.name.str.lower(), passenger_type_df.code))
        return passenger_type_dictionary

    def format_response_dictionary(self, entities):
        pass
    
format_ = Format()
#station_codes = format_.format_stations(stations)
station_codes

In [None]:
list( station_codes.keys() )

In [None]:
station_codes = {'Söğütlüçeşme': 'SGTC',
 'Pendik': 'PNDK',
 'Eskişehir': 'ESKR',
 'Polatlı': 'PLTL',
 'Ankara': 'ANKR'}

In [None]:
passenger = 'çocuk'

In [None]:
passenger_types = {
"ADT":["yolcu", "yetişkin", "kişi"],
"CHD":["çocuk"],
"PET":["hayvan"],
"YNG":["öğrenci", "genç"],
"TCH":["öğretmen", "öğretim görevlisi"],
"PRS":['basın', 'muhabir'],
"MLT":['asker'],
"STF":['personel', 'çalışan'],
"60Y":['altmış yaş'],
"65Y":['altmış beş yaş']}

In [None]:
passenger_code = [key for key, value in passenger_types.items() if passenger in value][0]
passenger_code

In [23]:
default_location = 'İstanbul'

In [58]:
class Extract(Apply, Format): # Process, Assemble, Format
    """
    Extract information from the received request and return a url as a response
    """
    def __init__(self):
        
        self.response_content = {}

        self.response_content['exampleDeparture'] = {}
        self.response_content['exampleArrival'] = {}
        
        self.response_content['exampleDeparture']["message"] = "Lütfen binmek istediğiniz durağı belirtin."
        self.response_content['exampleDeparture']["stationList"] = [""]
        
        self.response_content['exampleArrival']["message"] = "Lütfen inmek istediğiniz durağı belirtin."
        self.response_content['exampleArrival']["stationList"] = [""]
        
        self.response_content['url'] = ""
        
        # Retrieve stations and stations codes from yhtStations service
        #stations = self.request_stations()
        #self.station_codes = self.format_stations(stations)
        #self.station_names = list(self.station_codes.keys())
        
        self.station_codes = station_codes
        self.station_names = list(self.station_codes.keys())
        
    def extract_entities(self, document):
        """
        Extract required entities like station, passenger or date information
        
        parameters
        ----------
        document:
            A spacy object that could be used to retrieve entities from

        """
        stations = []
        passengers = []
        month = []
        number = []
        weekdays = []
        
        for index, entity in enumerate(document.ents):
            
            if entity.label_ == 'DURAK': stations.append(str(entity)) 

            if entity.label_ == 'YOLCU': passengers.append(str(entity)) 

            if entity.label_ == 'SAYI': number.append(str(entity)) 
            
            if entity.label_ == 'AY': month.append(str(entity)) 
            
            if entity.label_ == 'GÜN': weekdays.append(str(entity)) 


        entities = [stations, passengers, number, month, weekdays]
        return entities
        
    def extract_stations(self, entities, default_location):
        """
        Extract station information and add into the dictionary
        
        parameters
        ----------
        entities: list of lists
            Important information that is received from the input text
        
        default_location: str
            GPS location of the user that is received during the request. It is utilized if user indicates a single station
        """
        # Assign stations
        stations = entities[0]

        if len(stations) == 2: 
            
            # Assign departure and arrival stations
            departure, arrival = stations
            
            self.response_content['Kalkış'] = departure
            self.response_content['Varış'] = arrival

        if len(stations) == 1: 
            
            # Assign default_location as the departure location that is retrieved from GPS
            departure, arrival = default_location, stations[0]

            self.response_content['Kalkış'] = departure
            self.response_content['Varış'] = arrival

        if len(stations) == 0: 
            
            # Return possible stations if user does not input any station
            self.response_content['exampleDeparture']["stationList"] = self.station_names
            self.response_content['exampleArrival']["stationList"] = self.station_names
        
    def extract_dates(self, entities):
        """
        Extract date information and add into the dictionary
        
        parameters
        ----------
        entities: list of lists
            Important information that is received from the input text
        """
        # Assign dates
        dates = entities[3]
        weekdays = entities[4]

        if len(dates) == 2: 

            departure_date, returning_date  = dates
            
            self.response_content['KalkışTarihi'] = departure_date
            self.response_content['DönüşTarihi'] = returning_date

        if len(dates) == 1: 
            
            departure_date = dates[0]

            self.response_content['KalkışTarihi'] = departure_date
            self.response_content['DönüşTarihi'] = ''

        if len(dates) == 0: 
            
            # Tomorrow's date
            today = datetime.today()
            departure_date = today + timedelta(days = 1)

            self.response_content['KalkışTarihi'] = departure_date
            self.response_content['DönüşTarihi'] = ''
            
        if len(weekdays) == 2:
            
            departure_date, returning_date  = weekdays
            
            self.response_content['KalkışTarihi'] = departure_date
            self.response_content['DönüşTarihi'] = returning_date
            
        if len(weekdays) == 1:
            
            departure_date = weekdays[0]

            self.response_content['KalkışTarihi'] = departure_date
            self.response_content['DönüşTarihi'] = ''

    
    def extract_passengers(self, entities):
        """
        Extract passenger information and add into the dictionary
        
        parameters
        ----------
        entities: list of lists
            Important information that is received from the input text
        """
        # Assign passengers
        passengers = entities[2]
        #passenger_type_dictionary = self.load_passenger_types()
        
        pass

    def extract(self, request):
        """
        Aggregate all necessary functions into a single function and receive the request
        
        parameters
        ----------
        request: dictionary
            Receive request as JSON and parse it
        """
        default_location = request['defaultLocation']
        text = request['text']
        
        document = self.apply_ner_model(text)
        
        entities = self.extract_entities(document)

        self.extract_stations(entities, default_location)
        self.extract_dates(entities)
        self.extract_passengers(entities)
        
        return self.response_content
#        response = self.process_response()
#        response = self.assemble_link(response)
#        return response


In [59]:
nlp = spacy.load('./model-best')

In [63]:
text = '30 Haziran Ankara trenine 2 bilet'

In [64]:
request_ = {"defaultLocation": "SGTC", 
            "text": f"{text}"} 

In [65]:
extract = Extract()
extract.extract(request_)

{'exampleDeparture': {'message': 'Lütfen binmek istediğiniz durağı belirtin.',
  'stationList': ['']},
 'exampleArrival': {'message': 'Lütfen inmek istediğiniz durağı belirtin.',
  'stationList': ['']},
 'url': '',
 'Kalkış': 'SGTC',
 'Varış': 'Ankara',
 'KalkışTarihi': 'Haziran',
 'DönüşTarihi': ''}

In [None]:
days = ['pazartesi', 'salı', 'çarşamba', 'perşembe', 'cuma', 'cumartesi', 'pazar']

day_index = date.today().weekday()
days[day_index]

In [None]:
extract.extract_entities()