In [35]:
import pandas as pd
import plotly.express as px
import jalali_pandas
import numpy as np

In [36]:
excel_file = pd.ExcelFile("data.xlsx")
data = pd.read_excel('data.xlsx')

In [37]:
excel_file.sheet_names

['Table2',
 'Sheet2',
 'قیمت دستور کار مهر',
 'قیمت دستورکار شهریورماه',
 'بانک ماشین آلات',
 'Sheet3',
 'تعمیرات2.4 الی 2.6',
 'تعمیرات2.2 الی 2.4',
 'تعمیرات12.1 الی 2.2',
 'تعمیرات10.1 الی 12.1',
 'تعمیرات8.1 الی 10.1',
 'تعمیرات 6.31 الی8.1',
 'Sheet1',
 'مرداد -قیمت',
 'تیر - قیمت',
 'خرداد-قیمت',
 'اردیبهشت-قیمت',
 'فروردین-قیمت',
 'اسفند-قیمت',
 'بهمن-قیمت',
 'دی قیمت',
 'آذر-قیمت',
 'آبان - قیمت',
 'مهر1403-قیمت',
 'تعمیرات2.6 الی 3.8',
 'بانک تعمیرات']

### Tamirat Data Processing

In [38]:
tamirat_list_name = [name for name in excel_file.sheet_names if "الی" in name.lower()]

In [39]:
tamirat_data = pd.DataFrame()
for name in tamirat_list_name:
    df = pd.read_excel(excel_file, sheet_name=name)
    df['i'] = name
    tamirat_data = pd.concat([tamirat_data, df]).fillna('-')

In [40]:
tamirat_data.columns = tamirat_data.iloc[2].to_list()
tamirat_data.drop(index=[0, 1, 2], inplace=True)

In [41]:
px.bar(tamirat_data['Main system'].value_counts())

In [42]:
tamirat_data['sub system'].value_counts().to_frame().to_excel('cleaned_data/count_subsystem.xlsx')

### Price and Open-Close Analysis

In [43]:
price_sheetname_list = [name for name in excel_file.sheet_names if "قیمت" in name]

In [44]:
price_data = pd.DataFrame()
for name in price_sheetname_list:
    df = pd.read_excel(excel_file, sheet_name=name)
    df['i'] = name
    price_data = pd.concat([price_data, df]).fillna('-')

In [45]:
price_data.columns = price_data.iloc[2].to_list()
price_data.drop(index=[0, 1, 2], inplace=True)

In [46]:
price_data['finish'] = np.where(price_data['finish']==0, price_data['start'], price_data['finish'])
price_data['start-g'] = price_data['start'].jalali.parse_jalali("%Y/%m/%d").jalali.to_gregorian()
price_data['finish-g'] = price_data['finish'].jalali.parse_jalali("%Y/%m/%d").jalali.to_gregorian()
price_data['duration'] = (price_data['finish-g'] - price_data['start-g']).dt.days

In [47]:
price_data.sort_values('duration', ascending=False).head(10)

Unnamed: 0,order number,prise,start,finish,قیمت دستور کار مهر,start-g,finish-g,duration
4119,1397852,4400000,1403/08/12,1404/09/20,آبان - قیمت,2024-11-02,2025-12-11,404
6601,1393375,34400000,1403/12/04,1404/12/28,اسفند-قیمت,2025-02-22,2026-03-19,390
3859,1395179,35000000,1403/08/21,1404/09/14,آبان - قیمت,2024-11-11,2025-12-05,389
6461,1393895,10700000,1403/11/21,1404/11/28,بهمن-قیمت,2025-02-09,2026-02-17,373
785,1452021,450000000,1404/05/29,1405/06/03,مرداد -قیمت,2025-08-20,2026-08-25,370
5024,1398264,1600000,1403/12/07,1404/12/09,اسفند-قیمت,2025-02-25,2026-02-28,368
5857,1323135,376000000,1403/11/30,1404/12/01,بهمن-قیمت,2025-02-18,2026-02-20,367
193,1396782,73500000,1403/12/05,1404/12/05,اسفند-قیمت,2025-02-23,2026-02-24,366
2306,1396382,46400000,1403/12/29,1404/12/29,اسفند-قیمت,2025-03-19,2026-03-20,366
9587,1461443,96000000,1403/09/12,1404/09/12,آذر-قیمت,2024-12-02,2025-12-03,366


### price and tamirat lookup

In [93]:
merged_tamirat_price = tamirat_data.merge(price_data, left_on='order number', right_on='order number', how='inner')
merged_tamirat_price['quarter'] = merged_tamirat_price["Repair date"].jalali.parse_jalali("%Y/%m/%d").jalali.quarter

In [95]:
px.scatter(
    merged_tamirat_price.groupby('Main system').agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count'}).reset_index().sort_values('prise', ascending=False),
    x='Main system',
    y='duration', 
    size='prise',
    color='Work code')

In [96]:
merged_tamirat_price.groupby(['quarter', 'Main system']).agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count'}).reset_index().sort_values('prise', ascending=False)

Unnamed: 0,quarter,Main system,duration,prise,Work code
4,1,بازسازی,16.346154,3.890731e+09,26
16,1,سیستم وکیوم دستگاه جاروب,31.500000,3.666250e+09,2
40,2,سیستم نمک پاش و برفروب,26.974359,2.788828e+09,39
70,3,سیستم وکیوم دستگاه جاروب,0.250000,1.763281e+09,8
50,2,نمکپاش,13.285714,1.744379e+09,7
...,...,...,...,...,...
5,1,تعویض روغن,18.035088,8.459000e+07,570
88,4,سرویس و نگهداری,20.169683,7.849269e+07,442
101,4,معاینه فنی,14.117647,7.454206e+07,34
47,2,معاینه فنی,19.100000,7.058000e+07,10


In [97]:
merged_tamirat_price.groupby(['quarter']).agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count'}).reset_index().sort_values('duration', ascending=False)

Unnamed: 0,quarter,duration,prise,Work code
3,4,22.904891,338120500.0,18442
0,1,17.347365,424518000.0,22259
1,2,13.848077,479979400.0,22123
2,3,13.212308,336615600.0,42867


In [98]:
px.scatter(
    merged_tamirat_price.groupby(['quarter', 'Main system']).agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count'}).reset_index().sort_values('prise', ascending=False),
    x='Main system',
    y='prise', 
    size='duration',
    color='quarter')

### Machines Properties

In [99]:
machine_bank = pd.read_excel('data.xlsx', sheet_name='بانک ماشین آلات')

In [100]:
merged_tamirat_price_bank = merged_tamirat_price.merge(machine_bank, on='Machine code')

In [101]:
px.bar(merged_tamirat_price_bank['UnitGroup'].value_counts())

In [102]:
px.scatter(
    merged_tamirat_price_bank.groupby(['UnitGroup']).agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count'}).reset_index().sort_values('prise', ascending=False),
    x='UnitGroup',
    y='prise', 
    size='duration',
    color='Work code'
    )

In [103]:
merged_tamirat_price_bank['duration'].isna().sum()

0

In [None]:
merged_tamirat_price_bank.groupby(['CompanyName']).agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count'}).reset_index().sort_values('prise', ascending=False)

In [122]:
merged_tamirat_price_bank =merged_tamirat_price_bank[~merged_tamirat_price_bank['BuildDate'].isna()]
merged_tamirat_price_bank['BuildDate'] = merged_tamirat_price_bank['BuildDate'].astype(int)
px.scatter(
    merged_tamirat_price_bank.groupby(['CompanyName']).agg({'duration': 'mean', 'prise': 'mean', 'Work code': 'count', 'BuildDate': 'mean'}).reset_index().sort_values('BuildDate', ascending=False),
    x='CompanyName',
    y='prise',
    color='BuildDate',
    size = 'Work code')