In [None]:
import dapla as dp
import pandas as pd
import datetime
import random
import numpy as np
import requests, json
from pyjstat import pyjstat

### Settings

In [None]:
# Number of years to simulate
year_sim = 5

In [None]:
# Inputs
comp_in_path = '/felles/mock_sysselsatte/companies_2019_10000'
pop_in_path = '/felles/mock_sysselsatte/population_2019_10000'

companies = dp.read_pandas(comp_in_path)
population = dp.read_pandas(pop_in_path)    

In [None]:
start_year = int(pop_in_path.split('_')[-2])
start_pop_num = int(pop_in_path.split('_')[-1])
pop_num = len(population)
comp_num = len(companies)
print('Start year:\t\t', start_year,'\nPopulation size:\t', pop_num, '\nCompanies number:\t', comp_num)

In [None]:
pop_num_ratio = pop_num / 5328212

In [None]:
companies

In [None]:
population

### Statbank function

In [None]:
def statbank_pandas(url, payload):
    resultat = requests.post(url, json = payload)
    # Les resultatet som ett pyjstat-class-objekt
    dataset = pyjstat.Dataset.read(resultat.text)
    # Skriv "hovedresultat" til en dataframe
    df = dataset.write('dataframe')
    return df

# Populace

### Ageing

In [None]:
population['age'] = population['age'] + 1

In [None]:
population

### Birth

In [None]:
# 54495 in 2019 with a population of 5328212
total_pop = 5328212
birth_ratio = 54495 / total_pop
born_num = int(int(pop_num) * birth_ratio)
born_num

In [None]:
# Male to female ratio based on numbers from 2019
male = 28042
female = 26453

male_sex_ratio =  male / (male+female) 
female_sex_ratio = 1 - male_sex_ratio

male_sex_ratio 

In [None]:
def fnr_single(year, sex):
    start_date = datetime.date(year, 1, 1)
    end_date = datetime.date(year, 12, 31)

    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    date = f'{str(random_date.day).zfill(2)}{str(random_date.month).zfill(2)}'

    # numbers 5-6 are birthyear, last two digits of current year - age
    fnr = f'{date}{str(year)[2:]}'

    # Last 5 digits are random, if sex is male, last digit should be divisable by two

    # Edgecase might go from 99999 to 100000, so we start one below
    last5 = np.random.randint(0,99998) 
    # If the number cant be divded cleanly by two, and the sex is male, increase the number by one
    if last5 % 2 and sex == 'Menn':
        last5 += 1
    last5 = str(last5).zfill(5)

    fnr = fnr + last5
    return fnr

In [None]:
for i in range(born_num):
    
    ####### CHANGE WHEN ITERATING ########
    year_birth = int(start_year) + 1    
    
    if i < (int(born_num * male_sex_ratio)):
        sex = 'Menn'
    else:
        sex = 'Kvinner'
        
    fnr = fnr_single(year_birth, sex)
    
    add_row = [fnr, None, sex, 0.0, year_birth, float('NaN')]
    population = population.append(pd.Series(add_row, index = population.columns), ignore_index = True)

### Death

In [None]:
url = 'https://data.ssb.no/api/v0/no/table/07995/'
payload = {
  "query": [
    {
      "code": "Kjonn",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:AlleAldre00B",
        "values": [
          "000",
          "001",
          "002",
          "003",
          "004",
          "005",
          "006",
          "007",
          "008",
          "009",
          "010",
          "011",
          "012",
          "013",
          "014",
          "015",
          "016",
          "017",
          "018",
          "019",
          "020",
          "021",
          "022",
          "023",
          "024",
          "025",
          "026",
          "027",
          "028",
          "029",
          "030",
          "031",
          "032",
          "033",
          "034",
          "035",
          "036",
          "037",
          "038",
          "039",
          "040",
          "041",
          "042",
          "043",
          "044",
          "045",
          "046",
          "047",
          "048",
          "049",
          "050",
          "051",
          "052",
          "053",
          "054",
          "055",
          "056",
          "057",
          "058",
          "059",
          "060",
          "061",
          "062",
          "063",
          "064",
          "065",
          "066",
          "067",
          "068",
          "069",
          "070",
          "071",
          "072",
          "073",
          "074",
          "075",
          "076",
          "077",
          "078",
          "079",
          "080",
          "081",
          "082",
          "083",
          "084",
          "085",
          "086",
          "087",
          "088",
          "089",
          "090",
          "091",
          "092",
          "093",
          "094",
          "095",
          "096",
          "097",
          "098",
          "099",
          "100",
          "101",
          "102",
          "103",
          "104",
          "105+"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}

In [None]:
death_per_age = statbank_pandas(url, payload)

In [None]:
death_per_age['alder'] = death_per_age['alder'].str.replace(' år eller eldre','')
death_per_age['alder'] = death_per_age['alder'].str.replace(' år','')
death_per_age['alder'] = death_per_age['alder'].astype(int)

In [None]:
death_per_age['value_pop_ratio'] = round(death_per_age['value'].astype(int) * pop_num_ratio)

In [None]:
# We want this number to match the total death ratio
print(death_per_age['value'].sum() / total_pop)
print(death_per_age['value_pop_ratio'].sum() / pop_num)

In [None]:
accuracy = 0.01

while death_per_age['value'].sum() / total_pop > death_per_age['value_pop_ratio'].sum() / pop_num:
    pop_num_ratio *= 1 + accuracy
    #print(pop_num_ratio)
    death_per_age['value_pop_ratio'] = round(death_per_age['value'].astype(int) * pop_num_ratio)
    
    
while death_per_age['value'].sum() / total_pop < death_per_age['value_pop_ratio'].sum() / pop_num:
    pop_num_ratio *= 1 - accuracy
    #print(pop_num_ratio)
    death_per_age['value_pop_ratio'] = round(death_per_age['value'].astype(int) * pop_num_ratio)

In [None]:
# We want this number to match the total death ratio
print(death_per_age['value'].sum() / total_pop)
print(death_per_age['value_pop_ratio'].sum() / pop_num)

In [None]:
kill_list = death_per_age[death_per_age['value_pop_ratio'].astype(int) > 0]

In [None]:
kill_list

In [None]:
for i, row in kill_list.iterrows():
    sex = row['kjønn']
    kill_num = int(row['value_pop_ratio'])
    age = row['alder']
    
    #print('Kill', kill_num, sex, 'of age', age )
    selection = population[(population['age'] == age) & (population['sex'] == sex)]
    to_shuffle = list(selection.index)
    random.shuffle(to_shuffle)
    #print(to_shuffle[:kill_num])
    
    population = population.drop(to_shuffle[:kill_num])

In [None]:
# Kill everyone over 105
population = population[population['age'] <= 105]

In [None]:
population

### Emigration & Immigration

In [None]:
url = 'https://data.ssb.no/api/v0/no/table/09203/'
payload = {
  "query": [
    {
      "code": "Kjonn",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "item",
        "values": [
          "00-04",
          "05-09",
          "10-14",
          "15-19",
          "20-24",
          "25-29",
          "30-34",
          "35-39",
          "40-44",
          "45-49",
          "50-54",
          "55-59",
          "60-64",
          "65-69",
          "70-74",
          "75-79",
          "80+"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "Innvandring",
          "Utvandring"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}

In [None]:
migrate_age_sex = statbank_pandas(url, payload)

In [None]:
emigrate_age_sex = migrate_age_sex[migrate_age_sex['statistikkvariabel'] == 'Utvandring']
immigrate_age_sex = migrate_age_sex[migrate_age_sex['statistikkvariabel'] == 'Innvandring']

In [None]:
emigrat_ratio = emigrate_age_sex['value'].sum() / total_pop
emi_target = int(pop_num * emigrat_ratio)
emi_target

In [None]:
emigrate_age_sex['value_ratio'] = emigrate_age_sex['value'].astype(int) * emigrat_ratio

In [None]:
round(emigrate_age_sex['value_ratio']).sum()

In [None]:
accuracy = 0.01

while emigrate_age_sex['value'].sum() / total_pop > emigrate_age_sex['value_ratio'].sum() / pop_num:
    emigrat_ratio *= 1 + accuracy
    #print(emigrat_ratio)
    emigrate_age_sex['value_ratio'] = round(emigrate_age_sex['value'].astype(int) * emigrat_ratio)
    
    
while emigrate_age_sex['value'].sum() / total_pop < emigrate_age_sex['value_ratio'].sum() / pop_num:
    emigrat_ratio *= 1 - accuracy
    #print(emigrat_ratio)
    emigrate_age_sex['value_ratio'] = round(emigrate_age_sex['value'].astype(int) * emigrat_ratio)
    
print(emigrat_ratio)

In [None]:
round(emigrate_age_sex['value_ratio']).sum()

In [None]:
emi_list = emigrate_age_sex[emigrate_age_sex['value_ratio'].astype(int) > 0]

In [None]:
# Old emigration, picks randomly
#emi_list = list(population.index)
#random.shuffle(emi_list)
#population = population.drop(emi_list[:emi_target])
#population

In [None]:
emi_list['alder1'] = emi_list['alder'].str.split("-").str[0].astype(int)
emi_list['alder2'] = emi_list['alder'].str.split("-").str[1].str.replace(' år','').astype(int)
emi_list = emi_list.drop("alder", axis = 'columns')

In [None]:
emi_list.head(3)

In [None]:
for i, row in emi_list.iterrows():
    sex = row['kjønn']
    kill_num = int(row['value_ratio'])
    age1 = row['alder1']
    age2 = row['alder2']
    
    print('Emigrate', kill_num, sex, 'between age', age1, 'and', age2 )
    selection = list(population[(population['age'] >= age1) & (population['age'] <= age2) & (population['sex'] == sex)].index)
    random.shuffle(selection)
    print(selection[:kill_num])
    
    population = population.drop(selection[:kill_num])

In [None]:
population

### Immigration

In [None]:
immigrat_ratio = immigrate_age_sex['value'].sum() / total_pop
immi_target  = int(pop_num * immigrat_ratio)
immi_target

In [None]:
immigrate_age_sex['value_ratio'] = immigrate_age_sex['value'].astype(int) * immigrat_ratio

In [None]:
round(immigrate_age_sex['value_ratio']).sum()

In [None]:
accuracy = 0.01

while immigrate_age_sex['value'].sum() / total_pop > immigrate_age_sex['value_ratio'].sum() / pop_num:
    immigrat_ratio *= 1 + accuracy
    #print(pop_num_ratio)
    immigrate_age_sex['value_ratio'] = round(immigrate_age_sex['value'].astype(int) * immigrat_ratio)
    
    
while immigrate_age_sex['value'].sum() / total_pop < immigrate_age_sex['value_ratio'].sum() / pop_num:
    immigrat_ratio *= 1 - accuracy
    #print(pop_num_ratio)
    immigrate_age_sex['value_ratio'] = round(immigrate_age_sex['value'].astype(int) * immigrat_ratio)
    
print(immigrat_ratio)

In [None]:
round(immigrate_age_sex['value_ratio']).sum()

In [None]:
immi_list = immigrate_age_sex[immigrate_age_sex['value_ratio'].astype(int) > 0]

In [None]:
# Old emigration, picks randomly
#emi_list = list(population.index)
#random.shuffle(emi_list)
#population = population.drop(emi_list[:emi_target])
#population

In [None]:
immi_list['alder1'] = immi_list['alder'].str.split("-").str[0].astype(int)
immi_list['alder2'] = immi_list['alder'].str.split("-").str[1].str.replace(' år','').astype(int)
immi_list = immi_list.drop("alder", axis = 'columns')

In [None]:
immi_list['value_ratio'].sum()

In [None]:
population.head(3)

In [None]:
count = 0

for i, row in immi_list.iterrows():
    sex = row['kjønn']
    num = int(row['value_ratio'])
    age1 = row['alder1']
    age2 = row['alder2']
    
    print('Immigrate', num, sex, 'between age', age1, 'and', age2 )
    
    for i in range(num):
        age = np.random.randint(age1, age2 + 1)
        
        ######## CHANGE WITH ITERATIONS ########
        year_birth = start_year - age
        
        fnr = fnr_single(year_birth, sex)

        add_row = [fnr, None, sex, 0.0, year_birth, float('NaN')]
        #print(add_row)
        population = population.append(pd.Series(add_row, index = population.columns), ignore_index = True)
        count += 1
        
print(count)

In [None]:
# What sort of people immigrate?
population

#### Remove dupes on person-id

In [None]:
population.drop_duplicates('id', inplace = True)

# Companies

### Out of buisness

### New companies
Add one employee into each

#### Remove dupes on company ids

In [None]:
companies.drop_duplicates('work_id', inplace = True)

# Work

### Remove workers from non-existing companies

### Firings

### Pensions

### Hirings

### Re-establish work-percentage distribution

# Checks

### Companies without employees

In [None]:
# Loop over years
for year in range(int(start_year) + 1 , int(start_year) + year_sim + 1):
    print(year)
    
    # Do functions on datasets
    
    # Store datasets with year extention
    
    