<a href="https://colab.research.google.com/github/boco129/hoge/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
import random
from scipy.special import ndtri
from  typing import Dict, List
import numpy as np
import pandas as pd
import time
from joblib import Parallel, delayed

from scipy.stats import norm

def my_ndtri(p):
    return norm.ppf(p)

# my_ndtriを使用する

def gaussian_random(num_period, coefficient):
    rnd_revenue = [my_ndtri(random.random())for i in range(num_period)]
    rnd_drift = [coefficient * rnd_revenue[i] + np.sqrt(1-coefficient**2) * my_ndtri(random.random()) for i in range(num_period)]
    return {'rnd_revenue': rnd_revenue, 'rnd_drift':rnd_drift}

def vol_growth_rate(num_period, initial_volatility_growth_rate, mean_reversion_coeff_vol_growth_rate):
    return [initial_volatility_growth_rate*np.exp(-mean_reversion_coeff_vol_growth_rate * i) for i in range(num_period)]

def vol_revenue(num_period, initial_volatility_revenue, mean_revension_coeff_vol_revenue, ultimate_long_term_volatility_growth_rate):
    return [initial_volatility_revenue*np.exp(-mean_revension_coeff_vol_revenue*i)+ultimate_long_term_volatility_growth_rate * (1-np.exp(-mean_revension_coeff_vol_revenue*i))for i in range(num_period)]

def calc_drift(n_iter, ultimate_long_term_growth_rate, mean_reversion_coeff_revenue, adjusted_market_value_growth_rate, time_step, rnd_drift, vol_growth_rate, vol_revenue, drift):
    return np.exp(-mean_reversion_coeff_revenue * time_step)*drift[n_iter-1]+(1-np.exp(-mean_reversion_coeff_revenue*time_step))*(ultimate_long_term_growth_rate-(adjusted_market_value_growth_rate*vol_growth_rate[n_iter]/mean_reversion_coeff_revenue))+ np.sqrt((1-np.exp(-2*mean_reversion_coeff_revenue*time_step))/(2*mean_reversion_coeff_revenue)) * vol_growth_rate[n_iter]*rnd_drift[n_iter]

def calc_pl(
        num_period,
        initial_revenue,
        initial_expected_growth_rate_in_revenue,
        initial_surplus_cash_balance,
        initial_loss_carried_forward,
        ultimate_long_term_growth_rate,
        mean_reversion_coeff_revenue,
        adjusted_market_value_revenue,
        adjusted_market_value_growth_rate,
        risk_free_rate,
        tax_rate,
        ratio_cogs_on_revenue,
        fixed_component_otherexpense,
        ratio_flex_otherexpense,
        time_step,
        rnd_drift,
        rnd_revenue,
        default_line,
        vol_revenue,
        vol_growth_rate
):
    for i in range(num_period):
        if i==0:
            drift = [initial_expected_growth_rate_in_revenue]
            revenue = [initial_revenue]
            cost = [(ratio_cogs_on_revenue+ratio_flex_otherexpense)*initial_revenue+fixed_component_otherexpense]
            ebitda = [revenue[i]-cost[i]]
            surplus_cash = [max(initial_surplus_cash_balance, default_line)]
            interest_income = [(np.exp(risk_free_rate/4 * time_step)-1)*surplus_cash[i]]
            pl_before_tax = [ebitda[i]+interest_income[i]]
            accumulated_loss_carriedforward = [initial_loss_carried_forward]
            tax = [-min(0, (accumulated_loss_carriedforward[i]-pl_before_tax[i]))]
            net_income = [pl_before_tax[i]-tax[i]]
            default = [0]
            default_flag = [default[i]]
        else:
            drift.append(calc_drift(
                n_iter=i,
                ultimate_long_term_growth_rate=ultimate_long_term_growth_rate,
                mean_reversion_coeff_revenue=mean_reversion_coeff_revenue,
                adjusted_market_value_growth_rate=adjusted_market_value_growth_rate,
                time_step=time_step,
                rnd_drift=rnd_drift,
                vol_growth_rate=vol_growth_rate,
                vol_revenue=vol_revenue,
                drift=drift
            ))
            revenue.append((1-default[i-1])*revenue[i-1]*np.exp((drift[i]-adjusted_market_value_revenue*vol_revenue[i]-(vol_revenue[i]**2 /2))*time_step+vol_revenue[i]*np.sqrt(time_step)*rnd_revenue[i]))
            cost.append((1-default[i-1])*(ratio_cogs_on_revenue+ratio_flex_otherexpense)*revenue[i]+fixed_component_otherexpense)
            ebitda.append(revenue[i]-cost[i])
            interest_income.append((np.exp(risk_free_rate/4*time_step)-1)*surplus_cash[i-1])
            pl_before_tax.append(ebitda[i]+interest_income[i])
            tax.append(-min(0, (accumulated_loss_carriedforward[i-1]-pl_before_tax[i])*tax_rate))
            net_income.append(pl_before_tax[i]-tax[i])
            accumulated_loss_carriedforward.append(max(accumulated_loss_carriedforward[i-1]-net_income[i], 0))
            surplus_cash.append(max(surplus_cash[i-1]+net_income[i], default_line))
            if default[i-1]==1:
                default.append(1)
            elif surplus_cash[i]<=0:
                default.append(1)
            else:
                default.append(0)
            if default[i-1]==1:
                default_flag.append(0)
            elif surplus_cash[i]<=0:
                default_flag.append(1)
            else:
                default_flag.append(0)


    return {'vol_of_revenue':vol_revenue,
            'drift':drift,
            'revenue':revenue,
            'cost':cost,
            'ebitda':ebitda,
            'interest_income':interest_income,
            'pl_before_tax':pl_before_tax,
            'tax':tax,
            'net_income':net_income,
            'accumulated_loss_carriedforward':accumulated_loss_carriedforward,
            'surplus_cash':surplus_cash,
            'default':default,
            'default_flag':default_flag}

def rov_process(
        initial_revenue,
        initial_loss_carried_forward,
        initial_surplus_cash_balance,
        initial_expected_growth_rate_in_revenue,
        initial_volatility_revenue,
        initial_volatility_growth_rate,
        ultimate_long_term_growth_rate,
        ultimate_long_term_volatility_growth_rate,
        tax_rate,
        risk_free_rate,
        mean_revension_coeff_revenue,
        mean_revension_coeff_vol_revenue,
        mean_revension_coeff_vol_growth_rate,
        ratio_cogs_on_revenue,
        fixed_component_otherexpense,
        ratio_flex_otherexpense,
        adjusted_market_value_revenue,
        adjusted_market_value_growth_rate,
        time_horizon,
        time_step,
        default_line,
        sample_pathways,
        multiple_evitda,
        quantity_stock,
        investment_amt,
        coefficient
):
    NUM_PERIOD = time_horizon*4 +1
    rnd_dict = gaussian_random(num_period=NUM_PERIOD, coefficient=coefficient)
    vol_growth_rate_list = vol_growth_rate(num_period=NUM_PERIOD, initial_volatility_growth_rate=initial_volatility_growth_rate, mean_reversion_coeff_vol_growth_rate=mean_revension_coeff_vol_growth_rate)
    vol_revenue_list = vol_revenue(num_period=NUM_PERIOD, initial_volatility_revenue=initial_volatility_revenue, mean_revension_coeff_vol_revenue=mean_revension_coeff_vol_revenue, ultimate_long_term_volatility_growth_rate=ultimate_long_term_volatility_growth_rate)
    rov_dict = calc_pl(
     num_period=NUM_PERIOD,
     initial_revenue=initial_revenue,
     initial_expected_growth_rate_in_revenue=initial_expected_growth_rate_in_revenue,
     initial_surplus_cash_balance=initial_surplus_cash_balance,
     initial_loss_carried_forward=initial_loss_carried_forward,
     ultimate_long_term_growth_rate=ultimate_long_term_growth_rate,
     mean_reversion_coeff_revenue=mean_revension_coeff_revenue,
     adjusted_market_value_revenue=adjusted_market_value_revenue,
     adjusted_market_value_growth_rate=adjusted_market_value_growth_rate,
     risk_free_rate=risk_free_rate,
     tax_rate=tax_rate,
     ratio_cogs_on_revenue=ratio_cogs_on_revenue,
     fixed_component_otherexpense=fixed_component_otherexpense,
     ratio_flex_otherexpense=ratio_flex_otherexpense,
     time_step=time_step,
     rnd_drift=rnd_dict['rnd_drift'],
     rnd_revenue=rnd_dict['rnd_revenue'],
     default_line=default_line,
     vol_revenue=vol_revenue_list,
     vol_growth_rate=vol_growth_rate_list
    )
    for i, flag in enumerate(rov_dict['default']):
        if flag==1:
            output_dict={
                'default_year':i/4,
                'surplus_cash_t':0,
                'terminal_value':0,
                'equity_value':0,
                'stock_price':0,
                'urgl_per_stock':0,
                'renenue_ratio_after_5y':0
            }
            break
        else:
            default_year = 0
            surplus_cash_t = rov_dict['surplus_cash'][-1]
            terminal_value = rov_dict['ebitda'][-1]*multiple_evitda
            equity_value = (surplus_cash_t+terminal_value)*np.exp(-risk_free_rate*time_horizon)
            stock_price = equity_value/quantity_stock
            urgl_per_stock = stock_price-investment_amt
            renenue_ratio_after_5y = rov_dict['revenue'][4*5]/rov_dict['revenue'][0]-1
            output_dict={
                'default_year':default_year,
                'surplus_cash_t':surplus_cash_t,
                'terminal_value':terminal_value,
                'equity_value':equity_value,
                'stock_price':stock_price,
                'urgl_per_stock':urgl_per_stock,
                'renenue_ratio_after_5y':renenue_ratio_after_5y
            }            
    rov_dict['rnd_revenue'] = rnd_dict['rnd_revenue']
    rov_dict['rnd_drift'] = rnd_dict['rnd_drift']
    rov_dict['vol_of_growth_rate'] = vol_growth_rate_list
    #rov_df = pd.DataFrame(rov_dict)
    return rov_dict, output_dict

def simulate_rov(args_dict, n_jobs):
    def one_pathway(args_dict):
        sim_df, sim_out = rov_process(**args_dict)
        return sim_out['equity_value'], sim_out['stock_price'], sim_out['urgl_per_stock'], sim_df['default_flag'], sim_df['revenue'], sim_df['drift']

    result = Parallel(n_jobs=n_jobs, verbose=1)(delayed(one_pathway)(args_dict) for _ in range(args_dict['sample_pathways']))
    equity_value, stock_price, urgl_per_stock, default_flag, revenue, drift = zip(*result)

    return equity_value, stock_price, urgl_per_stock, default_flag, revenue, drift


In [None]:
args = {
    'initial_revenue':356,
    'initial_loss_carried_forward':559,
    'initial_surplus_cash_balance':906,
    'initial_expected_growth_rate_in_revenue':0.11,
    'initial_volatility_revenue':0.1,
    'initial_volatility_growth_rate':0.03,
    'ultimate_long_term_growth_rate':0.015,
    'ultimate_long_term_volatility_growth_rate':0.05,
    'tax_rate':0.35,
    'risk_free_rate':0.05,
    'mean_revension_coeff_revenue':0.07,
    'mean_revension_coeff_vol_revenue':0.07,
    'mean_revension_coeff_vol_growth_rate':0.07,
    'ratio_cogs_on_revenue':0.75,
    'fixed_component_otherexpense':75,
    'ratio_flex_otherexpense':0.19,
    'adjusted_market_value_revenue':0.01,
    'adjusted_market_value_growth_rate':0,
    'time_horizon':25,
    'time_step':1,
    'default_line':0,
    'sample_pathways':10000,
    'multiple_evitda':10,
    'quantity_stock':70,
    'investment_amt':10,
    'coefficient':0
}

In [None]:
equity_value, stock_price, urgl_per_stock, default_flag, revenue, drift = simulate_rov(args, n_jobs=30)

[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done 140 tasks      | elapsed:   25.9s
[Parallel(n_jobs=30)]: Done 390 tasks      | elapsed:   37.6s
[Parallel(n_jobs=30)]: Done 740 tasks      | elapsed:   52.6s
[Parallel(n_jobs=30)]: Done 1190 tasks      | elapsed:  1.2min
[Parallel(n_jobs=30)]: Done 1740 tasks      | elapsed:  1.6min
[Parallel(n_jobs=30)]: Done 2390 tasks      | elapsed:  2.0min
[Parallel(n_jobs=30)]: Done 3140 tasks      | elapsed:  2.6min
[Parallel(n_jobs=30)]: Done 3990 tasks      | elapsed:  3.2min
[Parallel(n_jobs=30)]: Done 4940 tasks      | elapsed:  3.8min
[Parallel(n_jobs=30)]: Done 5990 tasks      | elapsed:  4.5min
[Parallel(n_jobs=30)]: Done 7140 tasks      | elapsed:  5.3min
[Parallel(n_jobs=30)]: Done 8390 tasks      | elapsed:  6.2min
[Parallel(n_jobs=30)]: Done 9740 tasks      | elapsed:  7.2min
[Parallel(n_jobs=30)]: Done 10000 out of 10000 | elapsed:  7.3min finished


In [None]:
import os
os.cpu_count()

2

In [None]:
revenue

In [None]:
import multiprocessing
num_cores = multiprocessing.cpu_count()

In [None]:
num_cores

2

In [1]:
import pandas as pd
import re

def replace_address_format(address):
    # {数字}の{数字}の{数字}の...の形式を{数字}-{数字}-{数字}-...に変換
    address = re.sub(r'(\d+)(の(\d+))+', lambda m: m.group(0).replace('の', '-'), address)
    
    # {数字}番地の{数字}または{数字}番地{数字}を{数字}-{数字}に変換
    address = re.sub(r'(\d+)番地の?(\d+)', r'\1-\2', address)
    
    # {数字}番地を{数字}に変換
    address = re.sub(r'(\d+)番地', r'\1', address)

    return address

# テスト
df = pd.DataFrame({'address': ["123の456", "789番地の0", "123の456の789", "123の456 789番地0", "1の2の3の4の5", "6番地7", "8番地の9", "10番地"]})

# apply()を使って各行に関数を適用します。
df['address'] = df['address'].apply(replace_address_format)

print(df)


         address
0        123-456
1          789-0
2    123-456-789
3  123-456 789-0
4      1-2-3-4-5
5            6-7
6            8-9
7             10


In [11]:
def wide_roman_to_int(s: str) -> int:
    roman_dict = {
        'Ⅰ': 1, 'Ⅱ': 2, 'Ⅲ': 3, 'Ⅳ': 4, 'Ⅴ': 5, 
        'Ⅵ': 6, 'Ⅶ': 7, 'Ⅷ': 8, 'Ⅸ': 9, 'Ⅹ': 10, 
        'Ⅺ': 11, 'Ⅻ': 12, 'ⅰ': 1, 'ⅱ': 2, 'ⅲ': 3, 'ⅳ': 4,
        'ⅴ': 5, 'ⅵ': 6, 'ⅶ': 7, 'ⅷ': 8, 'ⅸ': 9, 
        'ⅹ': 10, 'ⅺ': 11, 'ⅻ': 12,
    }
    return sum(roman_dict[i] for i in s)


def convert_roman_in_address(address: str) -> str:
    # 全角ローマ数字の正規表現パターン
    pattern = r'[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+'
    
    # 正規表現で全角ローマ数字を見つけて、それぞれを整数に変換
    return re.sub(pattern, lambda m: str(wide_roman_to_int(m.group(0))), address)

df = pd.DataFrame({'address': ["123の456Ⅰ", "Ⅸ789番地の0Ⅸ", "123の456の789Ⅻ", "123の456 789番地0a", "1の2の3の4の5d", "6番地7e", "8番地の9f", "10番地ⅩⅡ"]})

df['address'] = df['address'].apply(convert_roman_in_address)

print(df)


           address
0         123の4561
1        9789番地の09
2    123の456の78912
3  123の456 789番地0a
4       1の2の3の4の5d
5            6番地7e
6           8番地の9f
7           10番地12


In [5]:
df['address'] = df['address'].str.upper()
df['address'] = df['address'].str.replace('ヶ', 'ケ')
df['address'] = df['address'].str.replace('・|･|·|•', '')

In [16]:
def kanji_to_int(s: str) -> int:
    kanji_dict = {
        '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
        '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
        '百': 100, '千': 1000, '万': 10000
    }
    multipliers = ['十', '百', '千', '万']
    num = 0
    temp_num = 0
    for c in s:
        if c in multipliers:
            if temp_num == 0:  # 「十」、「百」、「千」、「万」が一桁の数値に続かない場合は1を乗じる
                temp_num = 1
            temp_num *= kanji_dict[c]
        else:
            num += temp_num
            temp_num = kanji_dict[c]
    num += temp_num
    return num

# テスト用データフレームの作成
df = pd.DataFrame({
    'address': ['一番地の一', '十番地の二十', '百番地の三百四十', '千番地の四千五百六十', '二万三千四百五十六番地の七']
})

df['address'] = df['address'].apply(lambda x: re.sub(r'([一二三四五六七八九十百千万]+)', lambda m: str(kanji_to_int(m.group(1))), x))


In [17]:
df

Unnamed: 0,address
0,1番地の1
1,10番地の20
2,100番地の340
3,1000番地の4560
4,23456番地の7


In [20]:
def convert_english_numbers_to_digits(s: str) -> str:
    # 英語の数詞からアラビア数字へのマッピング
    number_mapping = {
        "ONE": 1, "TWO": 2, "THREE": 3, "FOUR": 4, "FIVE": 5,
        "SIX": 6, "SEVEN": 7, "EIGHT": 8, "NINE": 9, "TEN": 10
    }

    # マッピングを適用
    for word, number in number_mapping.items():
        s = s.replace(word, str(number))

    return s

df = pd.DataFrame({'address': ["銀座ONE"]})
df['address'] = df['address'].apply(convert_english_numbers_to_digits)
df

Unnamed: 0,address
0,銀座1


In [None]:
df['address'] = df['address'].str.replace('㈱|（株）|\(株\)|株式会社', '', regex=True)

In [21]:
!pip install pykakasi

Collecting pykakasi
  Downloading pykakasi-2.2.1-py3-none-any.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jaconv (from pykakasi)
  Downloading jaconv-0.3.4.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deprecated (from pykakasi)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Building wheels for collected packages: jaconv
  Building wheel for jaconv (setup.py) ... [?25l[?25hdone
  Created wheel for jaconv: filename=jaconv-0.3.4-py3-none-any.whl size=16416 sha256=3f597cc1de0b9e8ad893c5eb36f5955a077223a61b4d4927fbc63b03b0912580
  Stored in directory: /root/.cache/pip/wheels/46/8f/2e/a730bf1fca05b33e532d5d91dabdf406c9b718ec85b01b1b54
Successfully built jaconv
Installing collected packages: jaconv, deprecated, pykakasi
Successfully installed deprecated-1.2.14 jaconv-0.3.4 pykakasi-2.2.1


In [22]:
import pykakasi

In [28]:
kks = pykakasi.kakasi()
results = kks.convert("東京都品川区広町やまだマンションA棟")
kana_add = ""
for result in results:
  kana_add += result['kana']
kana_add

'トウキョウトシナガワクヒロマチヤマダマンションAトウ'

In [27]:
result

[{'orig': '東京都',
  'hira': 'とうきょうと',
  'kana': 'トウキョウト',
  'hepburn': 'toukyouto',
  'kunrei': 'toukyouto',
  'passport': 'tokyouto'},
 {'orig': '品川区',
  'hira': 'しながわく',
  'kana': 'シナガワク',
  'hepburn': 'shinagawaku',
  'kunrei': 'sinagawaku',
  'passport': 'shinagawaku'},
 {'orig': '広町',
  'hira': 'ひろまち',
  'kana': 'ヒロマチ',
  'hepburn': 'hiromachi',
  'kunrei': 'hiromati',
  'passport': 'hiromachi'},
 {'orig': 'やまだ',
  'hira': 'やまだ',
  'kana': 'ヤマダ',
  'hepburn': 'yamada',
  'kunrei': 'yamada',
  'passport': 'yamada'},
 {'orig': 'マンション',
  'hira': 'まんしょん',
  'kana': 'マンション',
  'hepburn': 'manshon',
  'kunrei': 'mansyon',
  'passport': 'manshon'},
 {'orig': 'A',
  'hira': 'A',
  'kana': 'A',
  'hepburn': 'A',
  'kunrei': 'A',
  'passport': 'A'},
 {'orig': '棟',
  'hira': 'とう',
  'kana': 'トウ',
  'hepburn': 'tou',
  'kunrei': 'tou',
  'passport': 'to'}]