In [1]:
from cloudmanufacturing.data import read_fatahi_dataset
from cloudmanufacturing.solvers.mip_solver import mip_solve
from cloudmanufacturing.validation import construct_delta, objvalue

from joblib import Parallel, delayed
from tqdm import trange
import pandas as pd
import numpy as np
import random
import openpyxl
from cloudmanufacturing.data_generation import create_excel_table
random.seed(42)

  from .autonotebook import tqdm as notebook_tqdm


#### Data generation

In [2]:
N = 500
file_path = '../data/train_data.xlsx'

trios = []
for i in range(N):
    trios.append(tuple((random.randint(5,15),random.randint(5, 20),random.randint(5,15))))
trios.sort()

wb = openpyxl.Workbook()
del wb['Sheet']

for n_operations, n_suboperations, n_cities in trios:
    wb = create_excel_table(wb, n_operations, n_suboperations, n_cities, n_problem=1)

wb.save(file_path)

#### Fatahi

In [11]:
dataset = read_fatahi_dataset("../data/fatahi.xlsx")

100%|██████████| 18/18 [00:00<00:00, 247.84it/s]


In [12]:
def fatahi_problem(i, problem):
    print(problem['n_operations'])
    print(problem['n_suboperations'])
    print(problem['n_cities'])
    delta, gamma, status, value = mip_solve(problem)

    if status.name == 'OPTIMAL':
        np.save(f'./fatahi_solutions/delta_{i}_op.npy', delta)
        np.save(f'./fatahi_solutions/gamma_{i}_op.npy', gamma)
        return {'i':i,
                'value': value,
                'objvalue': objvalue(problem, gamma, delta),
                'status': status.name}
    if status.name == 'FEASIBLE':
        np.save(f'./fatahi_solutions/delta_{i}_fb.npy', delta)
        np.save(f'./fatahi_solutions/gamma_{i}_fb.npy', gamma)
        return {'i':i,
                'value': value,
                'objvalue': objvalue(problem, gamma, delta),
                'status': status.name}
    print(i," wasn't solved")
    return None

In [13]:
sum_info = Parallel(n_jobs=-1)(
    delayed(fatahi_problem)(i, dataset[i]) for i in trange(len(dataset))
)
sum_info = [info for info in sum_info if info is not None]

100%|██████████| 18/18 [00:00<?, ?it/s]


### Train dataset

In [5]:
dataset_train = read_fatahi_dataset("../data/train_data.xlsx")

100%|██████████| 500/500 [04:11<00:00,  1.99it/s]


In [6]:
def train_problem(i, problem):
    delta, gamma, status, value = mip_solve(problem)

    if status.name == 'OPTIMAL':
        np.save(f'./train_solutions/delta_{i}_op.npy', delta)
        np.save(f'./train_solutions/gamma_{i}_op.npy', gamma)
        return {'i':i,
                'value': value,
                'objvalue': objvalue(problem, gamma, delta),
                'status': status.name}
    if status.name == 'FEASIBLE':
        np.save(f'./train_solutions/delta_{i}_fb.npy', delta)
        np.save(f'./train_solutions/gamma_{i}_fb.npy', gamma)
        return {'i':i,
                'value': value,
                'objvalue': objvalue(problem, gamma, delta),
                'status': status.name}
    print(i," wasn't solved")
    return None

In [7]:
sum_info_train = Parallel(n_jobs=-1)(
    delayed(train_problem)(i, dataset_train[i]) for i in trange(len(dataset_train))
)
sum_info_train = [info for info in sum_info_train if info is not None]

100%|██████████| 500/500 [1:30:57<00:00, 10.91s/it]


In [16]:
from collections import defaultdict

def generate_substrings(s):
    substrings = set()
    for i in range(len(s)):
        for j in range(i + 1, len(s) + 1):
            substrings.add(s[i:j])
    return substrings

def most_common_substring(strings):
    substring_count = defaultdict(int)
    for s in strings:
        substrings = generate_substrings(s)
        for substring in substrings:
            substring_count[substring] += 1
    
    most_common = max(substring_count, key=substring_count.get)
    return most_common, substring_count[most_common]

# Example usage
strings = ["apple", "appeal", "application"]
result = most_common_substring(strings)
print(result)  # Output will be the most common substring and its count


a
ap
app
appl
apple
p
pp
ppl
pple
p
pl
ple
l
le
e
a
ap
app
appe
appea
appeal
p
pp
ppe
ppea
ppeal
p
pe
pea
peal
e
ea
eal
a
al
l
a
ap
app
appl
appli
applic
applica
applicat
applicati
applicatio
application
p
pp
ppl
ppli
pplic
pplica
pplicat
pplicati
pplicatio
pplication
p
pl
pli
plic
plica
plicat
plicati
plicatio
plication
l
li
lic
lica
licat
licati
licatio
lication
i
ic
ica
icat
icati
icatio
ication
c
ca
cat
cati
catio
cation
a
at
ati
atio
ation
t
ti
tio
tion
i
io
ion
o
on
n
('app', 3)
