In [1]:
import gspread
import pandas as pd
from google.oauth2.service_account toimport Credentials
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

# Transportation problem
Problem: Lack of shipping containers and European trucks, difficulty reaching global markets due to restrictions, unbalanced mill operations in new demand circumstances.​
Limited export possibilities : 2 ports, Turkey, railway.
Request: calculate the best transportation allocation post-sanctions, based on 2021 data.
Data: list of deliveries for each customer
## Steps to accomplish
1) To sort out which deliveries are still relevant post sanctions. All EU28 countries stopped trading with Russia, as well as a handful of non-EU countries
2) To calculate the new allocation of transport per mill depending on the maximum capacity and cost per deliveries.

In [2]:
#Create API and Service credential on Google Cloud project, download and rename json
# mkdir %APPDATA%\gspread
# move credentials.json %APPDATA%\gspread\service_account.json
# gc = gspread.oauth()
sa = gspread.service_account()
sh = sa.open("data")

In [3]:
wks = sh.worksheet('Исходные данные')
#create a blank worksheet to keep the original untouched
wks1 = sh.worksheet('разбор')
#to format the number with commas instead of do-ts
wks1.format('E', {'numberFormat': {'type' : 'NUMBER', 'pattern': '0,0#'}})
wks2 = sh.worksheet('производительность установки')
wks2.format('E', {'numberFormat': {'type' : 'NUMBER', 'pattern': '0,0#'}})
wks3 = sh.worksheet('расстояние')
wks4 = sh.worksheet('спрос и предложение')
data = wks.get_all_values()
headers = data.pop(0)

In [4]:
df = pd.DataFrame(data, columns=headers)
#Remove Upper case
df.columns= df.columns.str.lower()
df.describe()

Unnamed: 0,точка отгрузки,страна назначения,страна,зона доставки в стране назначения,"стоимость (для ru - рубли, остальное - евро)",номер перевозки,месяц перевозки
count,12967,12967,12967,12967,12967,12967,12967
unique,7,32,32,73,442,12956,12
top,КОСТРОМА,RU,Russian Federation,центр,"17 000,00",10083439,Июнь
freq,3892,3972,3972,3533,493,2,1318


In [5]:
df['точка отгрузки'].value_counts()

КОСТРОМА      3892
УСТЬ-ИЖОРА    2540
НОВАТОР       1791
МАНТУРОВО     1446
СИНЯЧИХА      1396
УРАЛЬСКИЙ     1040
ТЮМЕНЬ         862
Name: точка отгрузки, dtype: int64

In [6]:
#Check if any empty rows
df.isnull().sum()

точка отгрузки                                  0
страна назначения                               0
страна                                          0
зона доставки в стране назначения               0
стоимость (для ru - рубли, остальное - евро)    0
номер перевозки                                 0
месяц перевозки                                 0
dtype: int64

In [7]:
# coco best classification type for our case is "EU" but some non-EU candidates joined sanctions
# check list https://notebook.community/konstantinstadler/country_converter/doc/country_converter_examples 
import country_converter as coco
cc = coco.CountryConverter()
df['EU'] = cc.pandas_convert(series=df['страна назначения'], to='EU') 
# Let's classify Russian Federation in our new column and check what are the others left
df['страна'].loc[df['EU'] == 'not found'].unique()

array(['Russian Federation', 'China', 'United States', 'Albania',
       'Norway', 'Switzerland', 'Serbia', 'Turkey',
       'Bosnia and Herzegovina', 'Uzbekistan', 'Belarus'], dtype=object)

In [8]:
#We start filtering out our non-EU countries that joined the sanctions  
df.loc[(df['EU'] == 'EU')|(df['страна'] == 'Albania')|
(df['страна'] == 'Bosnia and Herzegovina')|
(df['страна'] == 'Switzerland')|
(df['страна'] == 'Norway')|
(df['страна'] == 'United States')
, 'санкции'] = True
df['санкции'].fillna(value=False, inplace=True)

## Mill capacity

Objective = supply chain redistribution at lowest costs

Capacity used as a proxy is that each row per factory is considered as a unit of capacity. therefore, we count the total deliveries per mill and calculate the percentage of sanctioned deliveries, per mill, out of total capacity

In [9]:
#We need to rename to use it as a method
df.rename(columns = {'стоимость (для ru - рубли, остальное - евро)': 'стоимость'}, inplace = True)
#Problems with Novator for our localisation later
df = df.replace({'точка отгрузки':{'НОВАТОР':'ОПАЛИПСОВО'}})
df.стоимость = df.стоимость.str.replace(' ', '')
#Remove unicode, set locale method doesn't work but is the usual procedure
#we find the exact piece of text to replace by seeing the error when we wanted to convert to numeric
df['стоимость'].str.encode('ascii', 'ignore').str.decode('ascii')
df.стоимость = df.стоимость.str.replace('\u202f', '')
df.стоимость = df.стоимость.str.replace(',', '.')
#use locale to set to_numeric default use
import locale, pprint, time, calendar
locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, ('ru_RU', 'UTF-8'))
# doesn't work, could not convert to float using locale because contains unicode
df['стоимость'] = pd.to_numeric(df['стоимость'])
#add currency in separate column
df.loc[(df['страна'] !='Russian Federation'), 'валюта'] = 'EUR'
df.loc[(df['страна'] =='Russian Federation'), 'валюта'] = 'RUB'

In [10]:
#we take a dataframe with sanctions
# NB on method count = takes only non NaN values
df_sanction = df[~df['санкции'] == False]
sanction = df_sanction.groupby(['точка отгрузки']).санкции.count().reset_index()
total = df.groupby(['точка отгрузки']).страна.count().reset_index()
total.rename(columns = {'страна': 'поставка'}, inplace = True)
# We count the cost of the sanctioned deliveries
cost_sanctioned = df_sanction.groupby(['точка отгрузки']).стоимость.sum().reset_index()

In [11]:
performance = sanction.merge(total)
performance['процент поставок под действие санкций'] = (performance.санкции/performance.поставка)*100
performance = performance.merge(cost_sanctioned, how='inner')
performance.rename(columns = {'стоимость': 'общая стоимость поставок под действие санкций (EUR)'}, inplace = True)
no_sanction = performance.поставка - performance.санкции
performance.insert(loc = 3, column = 'без санкции',value = no_sanction)

In [12]:
#Checking if the numbers of our new table match with our df
print(df['точка отгрузки'].value_counts().sum())
print(df['санкции'].value_counts().sum())

12967
12967


In [13]:
#Upload final results in Google sheet
wks2.clear()
from gspread_dataframe import get_as_dataframe, set_with_dataframe
set_with_dataframe(worksheet=wks2, dataframe=performance, include_index=False,
include_column_header=True, resize=True)

## A linear function to optimize transportation costs
Objective = optimal transportations with minimum costs
Decision variable = mill capacity, VC and FC per mill
Constraints = Demand, maximum capacity per mill

SUPPLY: as a proxy, the maximum capacity (sum of number of deliveries) per mill has already been calculated previously. 

DEMAND: the sum of number of deliveries per destination (non sanctioned) will be used as a proxy

COST: We have our cost per delivery, but we need a proxy. Let's use only our non sanctioned deliveries, the rest is irrelevant for our solution
### Steps to accomplish: 
DEMAND: sum of deliveries per destination not sanctioned

COST: convert all in functional currency (RUB) at transaction date, calculate cost per km per mill-destination

Then create linear function using PulP

In [14]:
#we want to have a correct date formatting. Transform the month into a datetime format and correct the year (because it only contains the month, it sets default to year 1900)
df.rename(columns = {'месяц перевозки': 'месяц_перевозки'}, inplace = True)
df['месяц_перевозки'] = pd.to_datetime(df['месяц_перевозки'], format='%B')
df['месяц_перевозки'] = df['месяц_перевозки'].mask(df['месяц_перевозки'].dt.year == 1900, 
                             df['месяц_перевозки'] + pd.offsets.DateOffset(year=2021))
#Better not use apply because it's a loop function when you want to apply for a whole df a vectorised solution is better
#We now have our correct date, we can calculate historical XR with a set library
df['месяц_перевозки'] = pd.to_datetime(df.месяц_перевозки, unit='D').sort_index()

  df['месяц_перевозки'] + pd.offsets.DateOffset(year=2021))


In [15]:
from forex_python.converter import CurrencyRates
from datetime import datetime
c = CurrencyRates()
# get rates for all unique dates, added tqdm progress bar to see progress
from tqdm import tqdm  
rates_dict = pd.DataFrame({date: CurrencyRates().get_rates('EUR', date_obj=pd.to_datetime(date, unit='D'))
              for date in tqdm(df['месяц_перевозки'].unique())})
# now use these rates to set cost to 1/(RUB to currency_x rate), except when currency is RUB and when servicecost is 0, in those cases just use servicecost
#df['XR'] = df.apply(lambda row: 1.0/rates_dict[row['месяц_перевозки']][row['валюта']]*row['стоимость'] if row['валюта']!='RUB' and row['стоимость'] != 0 else row['стоимость'], axis=1)

100%|██████████| 12/12 [00:05<00:00,  2.27it/s]


In [16]:
rates_rub = rates_dict.T
rates_rub = rates_rub.loc[:,['RUB']]
rates_rub.index.name = 'месяц_перевозки'

In [17]:
df = df.merge(rates_rub,on='месяц_перевозки')
df.rename(columns = {'RUB': 'XR'}, inplace = True)
df.loc[(df['страна'] =='Russian Federation'),'XR'] = 1

In [18]:
df['стоимость_РУБ'] = df.стоимость * df.XR

In [19]:
print(df.shape[1])
df.columns

12


Index(['точка отгрузки', 'страна назначения', 'страна',
       'зона доставки в стране назначения', 'стоимость', 'номер перевозки',
       'месяц_перевозки', 'EU', 'санкции', 'валюта', 'XR', 'стоимость_РУБ'],
      dtype='object')

In [20]:
#We upload the final result to Google Sheet
wks1.clear()
from gspread_dataframe import get_as_dataframe, set_with_dataframe
set_with_dataframe(worksheet=wks1, dataframe=df, include_index=False,
include_column_header=True, resize=True)

In [21]:
#we want to calculate the cost and demand per destination
df_no_sanction = df[~df['санкции'] == True]
#formatting to transform
df_no_sanction.rename(columns = {'зона доставки в стране назначения': 'город'}, inplace = True)
#remove whitespace, homogenize each delivery zone
df_no_sanction['страна'] = df_no_sanction['страна'].str.strip()
df_no_sanction['город'] = df_no_sanction['город'].str.upper()
df_no_sanction['город'] = df_no_sanction['город'].str.strip()
#Generating a list to check individually, there are duplicates and formatting errors
list_zones = df_no_sanction['город'].unique()
list_inp = list_zones.tolist()
for item in list_inp: 
    print(item) 

ГРОЗНЫЙ
ВОСТОК
САНКТ-ПЕТЕРБУРГ
ЯНИНО
ОМСК
ЕКАТЕРИНБУРГ
УРАЛЬСКИЙ
СПБ
CПБ
КАЛУГА
ВОСТОК МО
МОСКВА
КОВРОВ
ЮГ МО 1
КОСТРОМА
ЦЕНТР
ПОКРОВ
СЕВЕР МО 1
УФА
ТАМБОВ
ИВАНОВО
ТЮМЕНЬ
ЗАПАД МО
СЕВЕР МО 2
ДЗЕРЖИНСК
СЕВЕР
GOMEL
КАЛИНИНГРАД
НОВОСИБИРСК
МАЛОЯРОСЛАВЕЦ
MANTUROVO
ПЕРМЬ
СЕВЕРО-ЗАПАД
SLOBODA
КРАСНОДАР
СОЧИ
ЮГ
СУРГУТ
ST. PETERSBURG
ВОРОНЕЖ
УЛЬЯНОВСК
СТАРЫЙ ОСКОЛ
НИЖНИЙ НОВГОРОД
КАЗАНЬ
CЕВЕРО-ЗАПАД
ПОВОЛЖЬЕ
ЧЕРЕПОВЕЦ
НОВАТОР
UZBEKISTAN
ЗАПАД
POLOCHANY
МУРМАНСК
БАТАЙСК
КИРОВ
ТВЕРЬ
СЫСЕРТЬ
МАНТУРОВО


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_sanction.rename(columns = {'зона доставки в стране назначения': 'город'}, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_sanction['страна'] = df_no_sanction['страна'].str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_sanction['город'] = df_no_sanction['город'].str.upper()
A value is trying to be set on a copy of

In [22]:
len(df_no_sanction)

4333

In [23]:
#Using our list previously we start correcting. First, replace cities of SPb under one and replace latin character
df_no_sanction = df_no_sanction.replace({'город': 
{'СПБ':'САНКТ-ПЕТЕРБУРГ','CПБ':'САНКТ-ПЕТЕРБУРГ','ST. PETERSBURG':'САНКТ-ПЕТЕРБУРГ','CЕВЕРО-ЗАПАД':'СЕВЕРО-ЗАПАД'
}})
#To calculate distance we need to replace the region by some cities (using Google search)
#NOVATOR is confusing, we replace by a city close to Sveza NOVATOR mill
df_no_sanction.loc[(df_no_sanction['страна'] == 'Russian Federation', 'город')] = df_no_sanction.replace({'город': 
{'MANTUROVO':'МАНТУРОВО','ТРОГИНО':'CТРОГИНО','СЕВЕР МО 1':'ХОРУГВИНО','СЕВЕР МО 2':'СОЛНЕЧНОГОРСК','ЮГ МО 1':'ГОРКИ ЛЕНИНСКИЕ','ЗАПАД МО':'ВОЛОКОЛАМСК','ВОСТОК МО':'ОРЕХОВО-ЗУЕВО',
'ЦЕНТР':'МОСКВА','СЕВЕР':'АРХАНГЕЛЬСК','ЗАПАД':'СМОЛЕНСК','ЮГ':'СОЧИ','ВОСТОК':'ХАБАРОВСК','СЕВЕРО-ЗАПАД':'САНКТ-ПЕТЕРБУРГ', 
}})
#Same but for non-RU cities
df_no_sanction_noRU = df_no_sanction[df_no_sanction['страна'] != 'Russian Federation']
#list_cities_noRU = df_no_sanction_noRU.groupby(['страна','город'])['страна назначения'].count().reset_index()
#Slabada is an ancient name for many villages that used to be part of Russia, found in Belarus and Ukraine.. 
df_no_sanction.loc[(df_no_sanction['страна'] == 'Belarus', 'город')] = df_no_sanction.replace({'город': {'ЦЕНТР':'МИНСК','GOMEL':'ГОМЕЛ','POLOCHANY':'ПОЛОЧАНЫ','SLOBODA':'ВИТЕБСК'}})
df_no_sanction.loc[(df_no_sanction['страна'] == 'China', 'город')] = df_no_sanction.replace({'город': {'ЦЕНТР':'ПЕКИН','ВОСТОК':'ШАНХАЙ'}})
df_no_sanction.loc[(df_no_sanction['страна'] == 'Serbia', 'город')] = df_no_sanction.replace({'город': {'ЦЕНТР':'БЕЛГРАД'}})
df_no_sanction.loc[(df_no_sanction['страна'] == 'Turkey', 'город')] = df_no_sanction.replace({'город': {'ЦЕНТР':'АНКАРА','СЕВЕР':'ИСТАНБУЛ','ЮГ': 'МЕРСИН','ЗАПАД': 'КАРС'}})
df_no_sanction.loc[(df_no_sanction['страна'] == 'Uzbekistan', 'город')] = df_no_sanction.replace({'город': {'UZBEKISTAN':'ТАШКЕНТ'}})

In [24]:
#Create one column to get the combination country-city delivery point
df_no_sanction.страна = df_no_sanction.страна.astype(str)
df_no_sanction.город = df_no_sanction.город.astype(str)
df_no_sanction.страна.str.encode('ascii', 'ignore').str.decode('ascii')

0        Russian Federation
1        Russian Federation
2        Russian Federation
3        Russian Federation
4        Russian Federation
                ...        
12582                Serbia
12589                Turkey
12590                Turkey
12591                Turkey
12799                Turkey
Name: страна, Length: 4333, dtype: object

In [25]:
df_no_sanction['страна_зона'] = df_no_sanction.страна + " " + df_no_sanction.город
df_no_sanction[['страна_зона']] = df_no_sanction[['страна_зона']].astype(str)

In [26]:
if True in df_no_sanction['страна'].apply(lambda x: x.isascii() if isinstance(x, str) else False).to_list():
    print('at least one ascii')
else:
    print('no ascii')

at least one ascii


In [27]:
print(df_no_sanction.columns)

Index(['точка отгрузки', 'страна назначения', 'страна', 'город', 'стоимость',
       'номер перевозки', 'месяц_перевозки', 'EU', 'санкции', 'валюта', 'XR',
       'стоимость_РУБ', 'страна_зона'],
      dtype='object')


In [28]:
demand = df_no_sanction.groupby(['страна','точка отгрузки','город','страна_зона','стоимость_РУБ'])['страна назначения'].count().reset_index()
demand.rename(columns = {'стоимость_РУБ': 'предложение_RUB'}, inplace = True)
demand.rename(columns = {'страна назначения': 'счет'}, inplace = True)
demand.drop(['счет'], axis=1)
#we take our total number per mill from our previous table
demand = demand.merge(total)
demand_per_combination  =  df_no_sanction.groupby(['страна_зона'])['страна назначения'].count().reset_index()
demand_per_combination.rename(columns = {'страна назначения': 'итоге_страна_зона'}, inplace = True)
demand = demand.merge(demand_per_combination)

In [29]:
# Calculate supply (total per mill) = done
# Converted all deliveries at historic rate (in RUB) = done
# Check and remove outliers per destination
import seaborn as sns
import warnings
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
%matplotlib inline
sns.set()
# Graphics in retina format are more sharp and legible
%config InlineBackend.figure_format = 'retina'

In [30]:
import geocoder
from pprint import pprint
key = '484ae73bfa5b43d28bde740af59c0c0c' # get api key from: https://opencagedata.com
mills = dict.fromkeys(demand['точка отгрузки'])
cities = dict.fromkeys(demand['город'])
for name in mills:
    result = geocoder.opencage(name, key=key)
    mills[name] = result.latlng
for name in cities:
    result = geocoder.opencage(name, key=key)
    cities[name] = result.latlng

In [31]:
#Transform, rename, merge
mills_distance = pd.DataFrame.from_dict(mills, orient='index',columns=['lat_точка','long_точка'])
cities_distance = pd.DataFrame.from_dict(cities, orient='index',columns=['lat_город','long_город'])
mills_distance.index.name = 'точка отгрузки'
cities_distance.index.name = 'город'
demand = demand.merge(mills_distance, left_on='точка отгрузки', right_on='точка отгрузки')
demand = demand.merge(cities_distance, left_on='город', right_on='город')

In [32]:
#Calculate distance between two points, calculate cost per km = proxy for cost minimization function
import h3
demand['расстояние'] = demand.apply(lambda row: h3.point_dist((row['lat_точка'], row['long_точка']), (row['lat_город'], row['long_город'])), axis=1)
demand['стоимость за километр'] = demand.предложение_RUB/demand.расстояние

In [33]:
#issue with inf type of float, which json doesn't recognise
demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']] = demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']].astype(str)
demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']] = demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']].astype(str)

In [34]:
#Some rows are very high in comparison to the distance, meaning the unit sold where much bigger than the km traveled
demand_SPB = demand[demand['страна_зона'] == 'Russian Federation_САНКТ-ПЕТЕРБУРГ']
demand_SPB = demand_SPB.sort_values(by='стоимость за километр', ascending = False)

In [35]:
#Upload final results in Google sheet
wks4.clear()
from gspread_dataframe import get_as_dataframe, set_with_dataframe
set_with_dataframe(worksheet=wks4, dataframe=demand, include_index=False,
include_column_header=True, resize=True)

In [36]:
#issue with inf type of float, which json doesn't recognise
demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']] = demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']].astype(float)
demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']] = demand[["lat_точка", "long_точка", "lat_город", "long_город", 'расстояние','стоимость за километр']].astype(float)

In [37]:
#Define translating def
import cyrtranslit
def translation(cyrillic):
    return cyrtranslit.to_latin(cyrillic,'ru')

In [38]:
#Need to put everything in UTF
demand_per_combination_latin = demand_per_combination[:]
demand_per_combination_latin['страна_зона'] = demand_per_combination_latin['страна_зона'].apply(lambda x: cyrtranslit.to_latin(x,'ru'))
demand_latin = demand[:]
demand_latin['точка отгрузки'] = demand_latin['точка отгрузки'].apply(lambda x: cyrtranslit.to_latin(x,'ru'))
demand_latin['страна_зона'] = demand_latin['страна_зона'].apply(lambda x: cyrtranslit.to_latin(x,'ru'))
demand_latin.страна_зона = demand_latin.страна_зона.str.replace("-", " ")
demand_per_combination_latin.страна_зона = demand_per_combination_latin.страна_зона.str.replace("-", " ")

In [39]:
distance = pd.pivot_table(demand_latin,values='расстояние',index=['точка отгрузки','страна_зона'], aggfunc='first')
cost = pd.pivot_table(demand_latin,values='стоимость за километр',index=['точка отгрузки','страна_зона'], aggfunc='first')

In [40]:
#Upload final results in Google sheet
wks3.clear()
from gspread_dataframe import get_as_dataframe, set_with_dataframe
set_with_dataframe(worksheet=wks3, dataframe=distance, include_index=True,
include_column_header=True, resize=True)

In [41]:
mills_latin = pd.Series(mills.keys()).apply(lambda x: cyrtranslit.to_latin(x,'ru'))
mills_final = dict.fromkeys(mills_latin.tolist())

In [42]:
mills

{'КОСТРОМА': [57.7679158, 40.9269141],
 'СИНЯЧИХА': [57.9891499, 61.5885861],
 'ТЮМЕНЬ': [57.153534, 65.542274],
 'УРАЛЬСКИЙ': [57.9333616, 55.5175276],
 'МАНТУРОВО': [58.333271, 44.761791],
 'ОПАЛИПСОВО': [60.73188, 46.1883074],
 'УСТЬ-ИЖОРА': [59.803871, 30.59951]}

In [43]:
for key, value in mills_final.items():
    for key2 in mills:
        mills_final[key] = mills[key2]
        break

In [44]:
#Develop new demand forecasts (less in SPb and Belgrade, more in Asian routes)
#Re-allocate 4333 non sanctioned orders + orders from SPb
adapted_mship = demand_latin[:]
for i in adapted_mship.страна_зона:
    if i in {'Russian Federation SOCHI','Russian Federation XABAROVSK','Russian Federation NOVOSIBIRSK','Turkey MERSIN','Uzbekistan TASHKENT'}:
        adapted_mship.loc[(adapted_mship['страна_зона'] == i),'итоге_страна_зона'] = adapted_mship.loc[(adapted_mship['страна_зона'] == i),'итоге_страна_зона'] + 750
    elif i in {'Russian Federation_САНКТ-ПЕТЕРБУРГ'}:
        adapted_mship.loc[(adapted_mship['страна_зона'] == i),'итоге_страна_зона'] = adapted_mship.loc[(adapted_mship['страна_зона'] == i),'итоге_страна_зона'] - 1225
    else:
        pass

In [45]:
#we start using our solver, set up
from pulp import *
#SETS
mills = list(mills_final.keys())
destinations = list(adapted_mship['страна_зона'].unique())
#dict of amount that can be shipped to each
#Need to change to dict into sum of orders from the country I guess
mship = adapted_mship.set_index('страна_зона').to_dict()['итоге_страна_зона']
#dict of max supply
supply = adapted_mship.set_index('точка отгрузки').to_dict()['поставка']
#dict of distance for all mills and destinations
adapted_mship['расстояние'] = adapted_mship['расстояние'].astype(float)
distance_set = adapted_mship[['точка отгрузки','страна_зона','расстояние']]
distance_set.rename(columns = {'точка отгрузки': 'точка'}, inplace = True)
from collections import defaultdict
d = defaultdict(dict)
for i, row in distance_set.iterrows():
    d[row.точка][row.страна_зона] = row.drop(['точка', 'страна_зона']).to_dict()
distance_dict = dict(d)

In [46]:
destinations

['Belarus VITEBSK',
 'Belarus GOMEL',
 'Belarus MINSK',
 "Belarus POLOCHANY'",
 'Russian Federation GORKI LENINSKIE',
 'Russian Federation DZERZHINSK',
 'Russian Federation IVANOVO',
 "Russian Federation KAZAN'",
 'Russian Federation KOVROV',
 'Russian Federation KOSTROMA',
 'Russian Federation MANTUROVO',
 'Russian Federation MOSKVA',
 'Russian Federation NOVATOR',
 "Russian Federation POVOLZH'E",
 'Russian Federation SANKT PETERBURG',
 'Russian Federation SOLNECHNOGORSK',
 'Russian Federation SOCHI',
 "Russian Federation STARY'J OSKOL",
 'Russian Federation TAMBOV',
 "Russian Federation UL'YANOVSK",
 'Russian Federation XORUGVINO',
 'Serbia BELGRAD',
 'Turkey ANKARA',
 'Turkey ISTANBUL',
 'Turkey KARS',
 'Turkey MERSIN',
 'Russian Federation OREXOVO ZUEVO',
 "Russian Federation TYUMEN'",
 'Russian Federation VORONEZH',
 'Russian Federation EKATERINBURG',
 'Russian Federation NOVOSIBIRSK',
 'Russian Federation SURGUT',
 "Russian Federation URAL'SKIJ",
 'Russian Federation XABAROVSK',


In [47]:
#we grab the value inside the dict to bring it at the right level
for mill, info in distance_dict.items():
    for k in info:
        info[k] = list(info[k].values())[0]

In [48]:
#Set problem variable
prob = LpProblem("Transportation", LpMinimize)
routes = [(i,j) for i in mills for j in destinations]
#DECISION VARIABLE
amount_vars = LpVariable.dicts('ShipAmount',(mills,destinations),0)

In [None]:
#OBJECTIVE FUNCTION
prob += lpSum(amount_vars[i][j]*distance_dict[i][j] for (i,j) in routes)
#CONSTRAINTS
for j in destinations:
    prob += lpSum(amount_vars[i][j] for i in mills <= mship[j])
for i in mills:
    prob += lpSum(amount_vars[i][j] for j in destinations == supply[i])
prob.solve()
print('Status', LpStatus[prob.status])

for v in prob.variables():
    if v.varValue > 0:
        print(v.name, '=', v.varValue)
print('total =', value(prob.objective))

# Visualisation part
The part below contains different scatterplots to visualise the data

In [50]:
#Let's visualise the production capacity on the map
demand_RU = demand[demand['страна'] == 'Russian Federation']
viz_RU = demand_RU.groupby(['точка отгрузки','поставка','lat_точка','long_точка']).город.count().reset_index()
viz_RU

Unnamed: 0,точка отгрузки,поставка,lat_точка,long_точка,город
0,КОСТРОМА,3892,57.767916,40.926914,113
1,МАНТУРОВО,1446,58.333271,44.761791,92
2,ОПАЛИПСОВО,1791,60.73188,46.188307,85
3,СИНЯЧИХА,1396,57.98915,61.588586,74
4,ТЮМЕНЬ,862,57.153534,65.542274,55
5,УРАЛЬСКИЙ,1040,57.933362,55.517528,72
6,УСТЬ-ИЖОРА,2540,59.803871,30.59951,96


In [51]:
import plotly.express as px
color_scale = [(0, 'blue'), (1,'red')]

fig = px.scatter_mapbox(demand, 
                        lat="lat_город", 
                        lon="long_город", 
                        hover_name="город", 
                        hover_data=["точка отгрузки", "стоимость за километр"],
                        color="стоимость за километр",
                        color_continuous_scale=color_scale,
                        size="стоимость за километр",
                        zoom=8, 
                        height=700,
                        width=700)

fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [52]:
import plotly.express as px
color_scale = [(0, 'orange'), (1,'red')]

fig = px.scatter_mapbox(viz_RU, 
                        lat="lat_точка", 
                        lon="long_точка", 
                        hover_name="точка отгрузки", 
                        hover_data=["точка отгрузки", "поставка"],
                        color="город",
                        color_continuous_scale=color_scale,
                        size="город",
                        zoom=8, 
                        height=700,
                        width=700)

fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()