In [None]:
import random
import math
import numpy as np
import pandas as pd
from sdv.single_table import GaussianCopulaSynthesizer

In [None]:
# Generates a new population
# pop_num - the size of the population
def generate_pop(pop_num):
  population = []

  for _ in range(0, pop_num):
    individual = (random() * 5, random() * -5, random() * -5, random() * -5)
    population.append(individual)

In [None]:
# crossover with 2 parents
def crossover(parent1, parent2, prob=0.7):
  if (random > prob):
    return parent1, parent2

  parent_pref = 0.6
  child1 = (parent1[0] * parent_pref + parent2[0] * (1 - parent_pref), parent1[1] * parent_pref + parent2[1] * (1 - parent_pref),
            parent1[2] * parent_pref + parent2[2] * (1 - parent_pref), parent1[3] * parent_pref + parent2[3] * (1 - parent_pref))
  child2 = (parent1[0] * (1 - parent_pref) + parent2[0] * parent_pref, parent1[1] * (1 - parent_pref) + parent2[1] * parent_pref,
            parent1[2] * (1 - parent_pref) + parent2[2] * parent_pref, parent1[3] * (1 - parent_pref) + parent2[3] * parent_pref)

  return child1, child2

In [None]:
# mutation on one individual
def mutation(individual, prob=0.01):
  if (random > prob):
    return individual

  random_param = random.randint(0, 3)
  individual[random_param] = random() * 5 if random_param == 0 else random() * -5

In [None]:
point = (0,0) # center of requests

def distance_measure(request):
  return math.sqrt((request.end_longitude-point.x) ** 2 + (request.end_latitude-point.y) ** 2)

def get_score(driver, request):
    driver.x*request.money + driver.y*request.time + driver.z*request.distance + driver.w*distance_measure(request)

def get_best(driver, requests, cur_ind, nxt_ind):

  best_score = get_score(driver, requests[cur_ind])
  best_ind = cur_ind
  for i in range(cur_ind+1, nxt_ind):
    cur_score = get_score(driver, requests[i])
    if(cur_score > best_score):
      best_ind = i

  return best_ind

x = 0.5
# computes the money gained by the driver for the expected requests
def iteration(driver, requests):
  money = 0
  current_time = 0
  for i in range(0, len(requests)):
    if (requests[i].start_time < current_time):
      continue

    request_score = get_score(driver, requests[i])
    if (request_score > x):
      current_time = requests[i].start_time + requests[i].duration
      money += request_score

  return money

In [None]:
# finds optimal parameters
# takes get_best function, ..., requests throughtout the day
def train(get_best, price, time, pick_up_distance, destination_score, requests, iter=100):
  population = generate_pop(100)

  best_one = population[0]
  for _ in range(0, iter):
    earnings = []
    earnings_sum = 0
    for i in range(0, len(population)):
      earning = iteration(population[i], requests)
      earnings_sum += earning
      earnings.append(earning)

    best_index = earnings.index(max(earnings))
    best_one = population[best_index]

    probs = []
    for i in range(0, len(population)):
      prob = earnings[i] / earnings_sum
      probs.append(prob)

    new_population = []
    for i in range(0, len(population) // 2):
      parents = random.choices(population, weights=probs, k=2)

      child1, child2 = crossover(parents[0], parents[1])
      child1 = mutation(child1)
      child2 = mutation(child2)

      new_population.append(child1)
      new_population.append(child2)

  return best_one

In [None]:
model = GaussianCopulaSynthesizer()
data = pd.read_csv('data/uber_hackathon_v2_mock_data.xlsx - rides_trips.csv')
columns = ['distance_km', 'duration_mins', 'net_earnings']
data = data[columns]
model.fit(data)
synthetic_data = model.sample(20)



center_lat, center_lon = 0, 0
std_km = 3  # average distance from center ~3 km
std_deg = std_km * 0.009  # rough conversion from km to degrees

std_lat = std_deg
std_lon = std_deg * 111/71  # adjust for longitude compression

n_points = 20  # number of trips

# Covariance matrix: small correlation between lat/lon if desired
cov_matrix = [[std_lat**2, 0], [0, std_lon**2]]  # no correlation

# Sample points
coords = np.random.multivariate_normal([center_lat, center_lon], cov_matrix, size=n_points)

# Create DataFrame
df_coords = pd.DataFrame(coords, columns=['pickup_lat', 'pickup_lon'])

def move_point_flat(lat, lon, distance_km):
    """
    Move (lat, lon) distance_km in a random direction, ignoring curvature.
    """
    # Random direction in radians
    angle = random.uniform(0, 2*math.pi)
    
    # Convert km to degrees
    delta_lat = (distance_km * math.cos(angle)) / 111
    delta_lon = (distance_km * math.sin(angle)) / (111 * math.cos(math.radians(lat)))
    
    # New point
    new_lat = lat + delta_lat
    new_lon = lon + delta_lon
    
    return new_lat, new_lon

df_coords[['drop_lat', 'drop_lon']] = df_coords.apply(
    lambda row: pd.Series(move_point_flat(row['pickup_lat'], row['pickup_lon'], random.uniform(0, 10))),
    axis=1
)



generated_requests = pd.concat([df_coords, synthetic_data], axis=1)

n = 20
avg_gap = 30
total_span = (n-1) * avg_gap

# Base values equally spaced
values = np.linspace(0, total_span, n)

# Add small random noise to each gap
noise = np.random.uniform(-15, 15, n)  # tweak noise range if desired
values = values + noise

# Clip to [0, 600] just in case
values = np.clip(values, 0, 600)

# Sort to ensure increasing order
values = np.sort(values)


generated_requests['start_time'] = values

print(generated_requests)

TypeError: GaussianCopulaSynthesizer.__init__() missing 1 required positional argument: 'metadata'