In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (20, 10)

# RETRIEVING AND BASIC FORMATTING EXCEL FILES

## Retrieving the .xlsx files
Find the directory where the excel files are living. 

Then **'excel_files'** will report what lives inside the folder. 

Note: Any additional excel files you want analysed will need to be put into the same location as **'directory'.**

In [4]:
directory = r"\\dc2-file001\Practice\InfoTech\BIM\Revit\RevitStandardsMasterBVN\02_BVN_Library\Master Content\X_Data\Data - ARR\_CONSOLIDATED v3"
files = os.listdir(directory)
excel_files = []
for f in files:
    if ".xlsx" in f:
        excel_files.append(f)
excel_files

['20170706 12_20 Family Types Log.xlsx',
 '20170706 12_20 FamilyCategory Log.xlsx',
 '20170706 12_20 FamilySubCategory Log.xlsx',
 '20170706 12_20 Host Families Log.xlsx',
 '20170706 12_20 Materials Log.xlsx',
 '20170706 12_20 Nested Families Log.xlsx',
 '20170706 12_20 OmniClass Log.xlsx',
 '20170706 12_20 Parameters Log.xlsx',
 '20170706 12_20 ParameterValuesByTypes Log.xlsx',
 '20170706 12_20 Reference Planes Log.xlsx',
 '20170706 12_20 Units Log.xlsx']

## xlsx. key and values

Use <b>keys</b> to pair each key with each excel file in <b>excel_files</b>. This is so that you don't have to type the long file name everytime you need to call it in the rest of the code. 

<b>file_dict</b> will tell you which excel file is paired with each key.

In [5]:
keys = [
        "FamilyTypes",
        "FamilyCategory",
        "FamilySubCategory",
        "HostFamilies",
        "Materials",
        "Nested Families",
        "Omniclass",
        "Parameters",
        "ParameterValuesByTypes",
        "ReferencePlanes",
        "Units",
        
       ]

file_dict = {key: value for (key, value) in zip(keys, excel_files)}
file_dict

{'FamilyCategory': '20170706 12_20 FamilyCategory Log.xlsx',
 'FamilySubCategory': '20170706 12_20 FamilySubCategory Log.xlsx',
 'FamilyTypes': '20170706 12_20 Family Types Log.xlsx',
 'HostFamilies': '20170706 12_20 Host Families Log.xlsx',
 'Materials': '20170706 12_20 Materials Log.xlsx',
 'Nested Families': '20170706 12_20 Nested Families Log.xlsx',
 'Omniclass': '20170706 12_20 OmniClass Log.xlsx',
 'ParameterValuesByTypes': '20170706 12_20 ParameterValuesByTypes Log.xlsx',
 'Parameters': '20170706 12_20 Parameters Log.xlsx',
 'ReferencePlanes': '20170706 12_20 Reference Planes Log.xlsx',
 'Units': '20170706 12_20 Units Log.xlsx'}

## FUNCTIONS TO FORMAT EXCEL FILES

### Removing the common file path characters from original family file path name

The example file path below is very long. In order to cut the fat out of the long file path, the common characters from each file path is removed. 

\\dc2-file001\Practice\InfoTech\BIM\Revit\RevitStandardsMasterBVN\02_BVN_Library\Master Content\Windows\Double Hung\Double Hung_3 Panel.rfa

Therefore, the start of the file path <b>\\dc2-file001\Practice\InfoTech\BIM\Revit\RevitStandardsMasterBVN\02_BVN_Library\Master Content</b> is removed in every spreadsheet.

### Removing the rfa. name from FamilyFilePath.

In the excel files, the family name (.rfa) is part of the FamilyFilePath column.

e.g. \\dc2-file001\Practice\InfoTech\BIM\Revit\RevitStandardsMasterBVN\02_BVN_Library\Master Content\Windows\Special\<u>Window Lancet_basic.rfa</u>

### Determines whether the family is an .rfa or from a .txt catalogue file.

<b> remove_rfaName</b> removes the family name from the FamilyFilePath column. This allows us to find things such as the count of how many families there are within a particular master content folder. It splits the <b>full_filepath</b> path by each \ .

<b>joined_again</b> will join the file path back through the \ except for the last one. The new file path is appended on a FamilyFilePath column.

### Determines whether the family is an .rfa or from a .txt catalogue file.
<b>getFileType</b> removes the last three characters from the FamilyFilePath.

<b>stripFileType</b> removes the last four characters (including the '.') from the FamilyName.

In [6]:
commonPartofFilePath = len(r'\\dc2-file001\Practice\InfoTech\BIM\Revit\RevitStandardsMasterBVN\02_BVN_Library\Master Content')
print commonPartofFilePath
def stripCommonPath(row):
        return row.FamilyFilePath[commonPartofFilePath+1:]
    
def remove_rfaName(row):
    full_filepath = row.FamilyFilePath
    split = full_filepath.split("\\")[:-1]
    joined_again = ("\\".join(split))
    return  joined_again

def getFileType(row):
    return row.FamilyFilePath[-3:]

def stripFileType(row):
    return row.FamilyName[:-4]

95


# REVIT VERSION

**rv_path** and **rv** reads from the ParameterValuesByTypes excel file.

**NOTE:** The columns - "FamilyEditedLast", "ParameterName", "ParameterValue", "ParameterIsInstance", "ParamterIsFormula", "ParameterIsReporting", "ParameterStorageType" are dropped from the table below.

In [7]:
rv_path = os.path.join(directory, file_dict["ParameterValuesByTypes"])
rv = pd.read_excel(rv_path)

rv.drop(["FamilyEditedLast", "ParameterName", "ParameterValue", "ParameterIsInstance", "ParamterIsFormula", "ParameterIsReporting", "ParameterStorageType"],axis=1,inplace=True)

rv.head()

Unnamed: 0,TimeProcessed,FamilyFilePath,FamilyName,FamilyCategory,OmniClassNumber,FamilyTypeName,ParamterGUID
0,0.514109,\\dc2-file001\Practice\InfoTech\BIM\Revit\Revi...,Window_Square Opening_WIN.rfa,Windows,,0400 x 1200mm,
1,0.514109,\\dc2-file001\Practice\InfoTech\BIM\Revit\Revi...,Window_Square Opening_WIN.rfa,Windows,,0400 x 1200mm,
2,0.514109,\\dc2-file001\Practice\InfoTech\BIM\Revit\Revi...,Window_Square Opening_WIN.rfa,Windows,,0400 x 1200mm,
3,0.514109,\\dc2-file001\Practice\InfoTech\BIM\Revit\Revi...,Window_Square Opening_WIN.rfa,Windows,,0400 x 1200mm,
4,0.514109,\\dc2-file001\Practice\InfoTech\BIM\Revit\Revi...,Window_Square Opening_WIN.rfa,Windows,,0400 x 1200mm,


## Number of Families in a Revit Version

In [8]:
nameAndVersion = rv[["RevitVersion", "FamilyName"]]
nameAndVersion.drop_duplicates(inplace=True)

rv_group = nameAndVersion.groupby("RevitVersion").agg("count")

KeyError: "['RevitVersion'] not in index"

In [None]:
rv_group.head(9)

In [None]:
rv_group.plot(kind="bar", grid=True);
plt.title("Number of families in each Revit Version", fontsize=20)
plt.xlabel("Revit Version", fontsize=15)
plt.ylabel("Number of families", fontsize=15)
plt.show()

## Filter for names of families in a revit version

Change the Revit Version value in <b>rv_filter</b> to filter the list of family names for a particular Revit version

In [None]:
rv_filter = rv[rv.RevitVersion==2011]

rv_filter.drop("FamilyTypeName",axis=1,inplace=True)

rv_filter.drop_duplicates(inplace=True)

rv_filter.shape
rv_filter.head(100)

## Number of Categories in a particular Revit Version = 2014

In [None]:
catAndVersion = rv_filter[["RevitVersion", "FamilyCategory"]]

catAndVersion_group = catAndVersion.groupby("FamilyCategory").agg("count")

catAndVersion_group.sort_values(by="RevitVersion",ascending=False, inplace=True)

catAndVersion_group.head()

In [None]:
catAndVersion_group.plot(kind="bar", grid=True);
plt.title("Number of families in particular category of a Revit Version", fontsize=20)
plt.xlabel("Family Categories", fontsize=15)
plt.ylabel("Number of families", fontsize=15)
plt.show()