In [69]:
import json
import pandas as pd
import numpy as np
import os
import re
from dax_extract import read_data_model_schema
from pathlib import Path

class MetaData():
    
    def __init__(self,pbit_path,schema_path="No path"):
        self.file=pbit_path
        self.path=Path(pbit_path)
        self.schema=schema_path
        self.pattern=r'[0-9]+'
        self.textpat=r'[a-zA-Z]+'
        self.items=os.listdir()
        try:
            self.data=read_data_model_schema(self.path)
        except:
            try:
                a=open(self.schema,"r",encoding='utf-8')
                self.data=json.load(a)
            except:
                a=open(self.schema,"r",encoding='utf-16')
                self.data=json.load(a)  
                
    def extract_measures(self):
        all_mes_files=list(filter(lambda x:'mea' in x,self.items))
        all_nums=list(map(lambda x:int(re.findall(self.pattern,x)[0]),all_mes_files))
        if len(all_nums)==0:
            m_path=str(re.findall(self.textpat,self.file)[0])+"_measures0.xlsx"
        else:
            m_path=str(re.findall(self.textpat,self.file)[0])+"_measures"+str(max(all_nums)+1)+".xlsx"
        
        tab_no=[]
        tab_name=[]
        mea_name=[]
        mea_exp=[]
        for i in range(len(self.data['model']['tables'])):
            if "measures" in self.data['model']['tables'][i]:
                for j in range(len(self.data['model']['tables'][i]['measures'])):
                    tab_no.append(i) 
                    mea_name.append(self.data['model']['tables'][i]['measures'][j]['name'])
                    tab_name.append(self.data['model']['tables'][i]['name'])
                    if 'expression' in self.data['model']['tables'][i]['measures'][j]:
                        if type(self.data['model']['tables'][i]['measures'][j]['expression'])==list:
                            mea_exp.append(" ".join(self.data['model']['tables'][i]['measures'][j]['expression']).strip())
                        else:
                            mea_exp.append(self.data['model']['tables'][i]['measures'][j]['expression'].strip())
                    else:
                        mea_exp.append("No expression")
            else:
                continue
            measures_df=pd.DataFrame({"table Number":tab_no,"table Name":tab_name,"Measure Name":mea_name,"Measure Expression":mea_exp})
            measures_df.to_excel(m_path)
            
            
    def extract_relationships(self):
        all_rel_files=list(filter(lambda x:'rel' in x,self.items))
        all_nums_rel=list(map(lambda x:int(re.findall(self.pattern,x)[0]),all_rel_files))
        if len(all_nums_rel)==0:
            r_path=str(re.findall(self.textpat,self.file)[0])+"_relationships0.xlsx"
        else:
            r_path=str(re.findall(self.textpat,self.file)[0])+"_relationships"+str(max(all_nums_rel)+1)+".xlsx"
        ft=[]
        fc=[]
        tt=[]
        tc=[]
        state=[]
                
        for i in self.data['model']['relationships']:
            ft.append(i['fromTable'])
            fc.append(i['fromColumn'])
            tt.append(i['toTable'])
            tc.append(i['toColumn'])
            if "state" in i:
                state.append(i['state'])
            else:
                state.append("No State")
    
        relationships=pd.DataFrame({"From Table":ft,"From Column":fc,"To Table":tt,"To Column":tc,"State":state})
        relationships.to_excel(r_path)

        
    def extract_calculated_columns(self):  
        all_col_files=list(filter(lambda x:'col' in x,self.items))
        all_nums_col=list(map(lambda x:int(re.findall(self.pattern,x)[0]),all_col_files))
        if len(all_nums_col)==0:
            c_path=str(re.findall(self.textpat,self.file)[0])+"_columns0.xlsx"
        else:
            c_path=str(re.findall(self.textpat,self.file)[0])+"_columns"+str(max(all_nums_col)+1)+".xlsx"
        
        table_number=[]
        table_name=[]
        name=[]
        expression=[]
        for i in range(len(self.data['model']['tables'])):
            if "columns" in self.data['model']['tables'][i]:
                for j in range(len(self.data['model']['tables'][i]['columns'])):
                    if "type" in self.data['model']['tables'][i]['columns'][j]:
                        if self.data['model']['tables'][i]['columns'][j]['type']=='calculated':
                            table_number.append(i)
                            table_name.append(self.data['model']['tables'][i]['name'])
                            name.append(self.data['model']['tables'][i]['columns'][j]['name'])
                            if "expression" in self.data['model']['tables'][i]['columns'][j]:
                                if type(self.data['model']['tables'][i]['columns'][j]['expression'])==list:
                                    expression.append(" ".join(self.data['model']['tables'][i]['columns'][j]['expression']).strip())
                                else:
                                    expression.append(self.data['model']['tables'][i]['columns'][j]['expression'].strip())
                            else:
                                expression.append("No expression")
                        else:
                            continue
                    else:
                        continue

        col_df=pd.DataFrame({"table no":table_number,"Table Name":table_name,"name":name,"expression":expression})
        col_df.to_excel(c_path)
        


In [61]:
a=MetaData("Weave","Weave.txt")

In [62]:
a.extract_measures()

In [63]:
a.extract_relationships()

In [64]:
a.extract_calculated_columns()

In [65]:
b=MetaData("BFSI","BFSI.txt")

In [66]:
b.extract_measures()

In [67]:
b.extract_relationships()

In [68]:
b.extract_calculated_columns()