<a href="https://colab.research.google.com/github/brendanlooker/colab-examples/blob/main/looker/lookml_code_consolidator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies

!pip install looker_sdk
!pip install lkml     # https://lkml.readthedocs.io/en/latest/
!pip install PyGithub

In [None]:


import glob
import looker_sdk
from looker_sdk import models40
import os
from git import Repo
import re
import lkml
from collections.abc import Hashable
from getpass import getpass

In [None]:
# Config Looker Connection

os.environ["LOOKERSDK_BASE_URL"] = "https://0a008a8e-ad98-4f2e-95c4-99bcdc1ff974.looker.app"       # Update to reference valis Looker instance
os.environ["LOOKERSDK_CLIENT_ID"] = input("Enter Looker Client ID: ")          # Add API Client ID
os.environ["LOOKERSDK_CLIENT_SECRET"] = getpass("Enter Looker Client Secret: ") # Add API Client ID

In [None]:
# Test Authenticatin to Looker

sdk = looker_sdk.init40()

me = sdk.me(fields="email")
print(me.email)

In [None]:
# Get a list of all LookML files
# Specify the Looker Project Id
# Only view files are considered

project_id = "lg_demo_env" #@param {type:"string"}

all_files = sdk.all_project_files(project_id=project_id, fields="id")

file_list = []
for file in all_files:
    if file.id.endswith("view.lkml"):
        file_details = (file.id,os.path.basename(file.id))
        file_list.append(file_details)
print(file_list)

In [None]:
for file in file_list:
  print(file[0])

In [None]:
# Get git remote url associated with Looker project via Looker SDK


git_remote_url = sdk.project(
    project_id=project_id)

print(git_remote_url.git_remote_url)

pattern = r"git@github\.com:(.*?)/(.*?).git"
match = re.match(pattern,git_remote_url.git_remote_url)

if match:
    repo_owner, repo_name = match.groups()
    print(f"Repo Owner: {repo_owner}")
    print(f"Repo Name: {repo_name}")


In [None]:
# Clone Repo
# You can simply run a command like the one below to clone the repo locally (you will need to authenticate)
# Once cloned you can begin to run the code to compare LookML objects across files

# !git clone https://gitlab.com/yourusername/yourrepository.git

# Alternative use GitHub API
github_token = ""  # @param {type:"string"}
repo_url = f"https://{github_token}@github.com/{repo_owner}/{repo_name}.git"  # Use token in the URL
local_dir = f"./{project_id}"

# Clone repo if not already cloned
if not os.path.exists(local_dir):
    Repo.clone_from(repo_url, local_dir)
    print("Repository cloned locally.")
else:
    print("Repository already exists locally.")

In [None]:
# Helper class that encapsulates all functionality related to
# parsing LookML files, extracting relevant fields, comparing them,
# and writing the output to new files.

class LookMLHandler:
    @staticmethod
    def parse_lookml_file(file_path):
      # Load LookML file and parse its content into a Python dictionary
        print(f"Parsing LookML file: {file_path}")
        with open(file_path, "r") as file:
            content = file.read()
        return lkml.load(content)



    @staticmethod
    def extract_fields(lookml_data):
      # Extract all dimensions, dimension groups and measures from the LookML file
        dimensions = {}
        measures = {}
        dimension_groups = {}
        for view in lookml_data.get("views", []):

            # Extract dimensions
            for dimension in view.get("dimensions", []):
                name = dimension.get("name")
                # Store all attributes except the name for comparison
                attributes = {k: v for k, v in dimension.items() if k != "name"}
                dimensions[name] = attributes

            # Extract dimension groups
            for dimension_group in view.get("dimension_groups", []):
                name = dimension_group.get("name")
                attributes = {k: v for k, v in dimension_group.items() if k != "name"}
                dimension_groups[name] = attributes

            # Extract measures
            for measure in view.get("measures", []):
                name = measure.get("name")
                attributes = {k: v for k, v in measure.items() if k != "name"}
                measures[name] = attributes

        print(f"Extracted dimensions: {dimensions.keys()}")
        print(f"Extracted dimension_groups: {dimension_groups.keys()}")
        print(f"Extracted measures: {measures.keys()}")
        return dimensions, dimension_groups, measures



    @staticmethod
    def make_hashable(attrs):
      # Convert attributes into a fully hashable format for set comparison
        def freeze(x):
            if isinstance(x, dict):
                return frozenset((k, freeze(v)) for k, v in x.items())
            if isinstance(x, list):
                return tuple(freeze(i) for i in x)
            return x if isinstance(x, Hashable) else str(x)
        return freeze(attrs)



    @staticmethod
    def write_identical_fields(identical_dims, identical_dim_groups, identical_measures, output_file):
      # Write all identical dimensions, dimension groups and measures to a LookML file
        print(f"Writing identical fields to {output_file}")
        lookml_structure = {
            "views": [
                {
                    "name": "identical_fields",
                    "dimensions": [
                        {"name": name, **attributes} for name, attributes in identical_dims.items()
                    ],
                    "dimension_groups": [
                        {"name": name, **attributes} for name, attributes in identical_dim_groups.items()
                    ],
                    "measures": [
                        {"name": name, **attributes} for name, attributes in identical_measures.items()
                    ]
                }
            ]
        }
        with open(output_file, "w") as file:
            file.write(lkml.dump(lookml_structure))
        print(f"Identical fields written successfully to {output_file}")



    @staticmethod
    def remove_identical_from_file(file_path, identical_dims, identical_dim_groups, identical_measures):
      # Remove matching fields from the original file to avoid duplication
        print(f"Removing identical fields from {file_path}")
        with open(file_path, "r") as file:
            lookml_data = lkml.load(file)

        modified = False
        for view in lookml_data.get("views", []):

            view["dimensions"] = [
                d for d in view.get("dimensions", []) if d.get("name") not in identical_dims
            ]

            view["dimension_groups"] = [
                dimension_group for dimension_group in view.get("dimension_groups", [])
                if dimension_group.get("name") not in identical_dim_groups
            ]

            view["measures"] = [
                m for m in view.get("measures", []) if m.get("name") not in identical_measures
            ]
            modified |= True

        if modified:

            # Add the comment at the beginning of the file
            comment = "# Code modified as part of code consolidation effort, common objects have been moved to identical_fields.lkml\n\n"

            # Convert the LookML data back to a string
            updated_content = comment + lkml.dump(lookml_data)


            with open(file_path, "w") as file:
                  file.write(updated_content)
                # file.write(lkml.dump(lookml_data))
            print(f"Removed identical fields from {file_path}")
        else:
            print(f"No identical fields found in {file_path} to remove")



In [None]:
# Main function


def compare_lookml_files(file_paths, output_file):
  # Main function that coordinates parsing, comparison, output, and cleanup
    handler = LookMLHandler()
    common_dims = None
    common_dim_groups = None
    common_measures = None

    # Go through each file to extract and hash its fields for set comparison
    for idx, file_path in enumerate(file_paths):
        lookml_data = handler.parse_lookml_file(file_path)
        dims, dim_groups, measures = handler.extract_fields(lookml_data)

        # Convert dimensions/measures to a hashable set of (name, attributes)
        dims_set = set((name, handler.make_hashable(attrs)) for name, attrs in dims.items())
        dim_groups_set = set((name, handler.make_hashable(attrs)) for name, attrs in dim_groups.items())
        measures_set = set((name, handler.make_hashable(attrs)) for name, attrs in measures.items())

        # For the first file, initialize the comparison sets
        if idx == 0:
            common_dims = dims_set
            common_dim_groups = dim_groups_set
            common_measures = measures_set
        else:
            # Perform intersection to retain only fields common across all files
            common_dims &= dims_set
            common_dim_groups &= dim_groups_set
            common_measures &= measures_set

    # Reconstruct dictionaries from common entries
    identical_dims = {name: dict(attrs) for name, attrs in common_dims} if common_dims else {}
    identical_dim_groups = {name: dict(attrs) for name, attrs in common_dim_groups} if common_dim_groups else {}
    identical_measures = {name: dict(attrs) for name, attrs in common_measures} if common_measures else {}

    # If common fields exist, write and clean them from the original files
    if identical_dims or identical_dim_groups or identical_measures:
        print(f"‚úÖ Identical dimensions found: {list(identical_dims.keys())}")
        print(f"‚úÖ Identical dimension groups found: {list(identical_dim_groups.keys())}")
        print(f"‚úÖ Identical measures found: {list(identical_measures.keys())}")
        handler.write_identical_fields(identical_dims, identical_dim_groups, identical_measures, output_file)
        for file_path in file_paths:
            handler.remove_identical_from_file(file_path, identical_dims, identical_dim_groups, identical_measures)
        print("‚úÖ Processing complete. Identical fields removed and saved.")
    else:
        print("No identical fields found across all files.")





In [None]:

# Base dir where LookML code is located
local_base_dir = f"/content/{project_id}/"

# Folder from where to perform the search
# For example, if your LookML project contains a root folder named "Views"
# You can seach all objects recursively by setting the folder variable to "Views"
# You can also restrict the seach by specifying a specific path (e.g. "Views/cc")
folder = "Views" #@param {type:"string"}

# Set the folder path based on the folder value specified
folder_path = f"{local_base_dir}{folder}"


# Specify the filename of interest, leave blank if all files should be in scope
filename = "device.view.lkml" #@param {type:"string"}

file_paths = []

# If both folder and filename are provided, search only in that folder for the specific filename
if folder and filename:
      print(f"Searching for {filename} in folder: {folder_path}")
      file_paths = glob.glob(os.path.join(folder_path, "**", filename), recursive=True)

# If folder is provided and filename is not, search all files in the folder recursively
elif folder:
    print(f"Searching for all files in folder: {folder_path}")
    file_paths = glob.glob(os.path.join(folder_path, "**", "*.view.lkml"), recursive=True)

# If filename is provided and folder is not, search for that file across all folders
elif filename:
        print(f"Searching for {filename} across all folders.")
        file_paths = glob.glob(os.path.join("**", filename), recursive=True)

# If neither folder nor filename is provided, search all .view.lkml files across all folders
else:
    print("Searching for all .view.lkml files across all folders.")
    file_paths = glob.glob("**/*.view.lkml", recursive=True)


file_paths = [os.path.abspath(file) for file in file_paths]
print(file_paths)


In [None]:
# File where matched dimesions, dimesion groups and measure should be written to
output_file = f"{local_base_dir}identical_fields.lkml"
print(output_file)

In [None]:
# Run comparison process

compare_lookml_files(file_paths,output_file)

In [None]:
from git import Repo

def commit_and_push_changes(local_base_dir, commit_message, branch):
    """Stages and commits changes, then pushes to the remote repository."""

    # Open the local Git repository
    repo = Repo(local_base_dir)

    # Ensure the correct branch is checked out
    if branch not in repo.branches:
        print(f"‚ö†Ô∏è Branch '{branch}' does not exist locally. Creating and checking it out.")
        # Create and checkout the branch based on the current branch (usually 'main' or 'master')
        repo.git.checkout('HEAD', b=branch)
    else:
        print(f"‚úÖ Branch '{branch}' exists. Checking it out.")
        # Checkout the branch if it exists
        repo.git.checkout(branch)

    # Check if the repository is dirty (i.e., there are changes to commit)
    if repo.is_dirty():
        print("üö® Repository has uncommitted changes.")

        # Stage all the changes
        repo.git.add(A=True)  # Add all changes (including new files, modified files)
        print("Staged changes.")

        # Commit the changes
        try:
            repo.index.commit(commit_message)
            print(f"‚úÖ Changes committed: {commit_message}")
        except Exception as e:
            print(f"‚ùå Error committing changes: {e}")
            return

        # Push the changes to the remote repository
        try:
            origin = repo.remote(name='origin')
            origin.push(branch)
            print(f"‚úÖ Changes pushed to branch: {branch}")
        except Exception as e:
            print(f"‚ùå Error pushing changes: {e}")
    else:
        print("No changes detected. Repository is clean.")

# Example usage:
commit_message = "Updated LookML files with consolidated objects"
branch = "dev-brendan-buckley-pnpg"  #@param {trpe: "string"}

commit_and_push_changes(local_base_dir, commit_message, branch)
