# **SVD Comparisons (new C++20 library vs. numpy.linalg.svd)**

## Team H
* Evan Ram
* Prateek Makhija
* James Douthit
* Garrett Hempy

In [51]:
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pillow

import numpy as np
import os
import subprocess
from PIL import Image

class FengwangSVD:

    def __init__(self, A):
        """
        `A` should be a 2d numpy array
        """
        
        self._A = A
    
    @staticmethod
    def build():
        """
        Builds the Fengwang Matrix library SVD tester using Docker.

        New C++20 features are not available on many machines,
        so please have Docker installed.
        """

        # Use bang command in Jupyter notebook since we don't care about this command's output
        !docker build -t fengwang-matrix-svd .
        
        # Technically we don't run any Python in this method
        pass

    def run(self):
        """
        Runs the C++ test program for the fengwang/matrix library.
        Returns its command line output as a list of lines from stdout.

        Not using bang command b/c we want to process the output to get timings.
        Timing from the start of this method to the end of it is pointless since
        it includes overhead of process creation.
        """

        self._write_matrix()
        cmd = f'docker run -v {self._program_io_path()}:/program_io fengwang-matrix-svd'.split()
        proc_out = subprocess.check_output(cmd)
        lines = proc_out.decode('utf-8').split('\n')
        self._read_stats(lines)
        self._read_matrices()
    
    @property
    def stats(self):
        """
        A dict containing the stat results from running the program.
        """

        if not hasattr(self, '_stats'):
            raise Exception('Please call run() first')

        return self._stats
    
    def _program_io_path(self):
        path = os.path.join(os.getcwd(), 'fengwang-matrix/program_io')
        if not os.path.exists(path):
            os.makedirs(path)
        return path
    
    def _write_matrix(self):
        """
        Write matrix A as input data to the program.
        """
        
        path = os.path.join(self._program_io_path(), 'input.npy')
        with open(path, 'wb') as f:
            np.save(f, self._A)
    
    def _read_stats(self, stdout_lines):
        """
        Parse stdout for key/value pairs and assign it to self._stats
        """
        
        stats = {}
        start_processing = False
        
        for i, line in enumerate(stdout_lines):
            # Most lines end with carriage return '\r' for some reason
            line = line.strip()
            
            if line == '!!BEGIN-STATS!!':
                # Title of program (sanity check that it starts up)
                start_processing = True
                continue
            elif not start_processing:
                # Program spits out diagnostic data on `load_npy`, cant seem to disable it...
                continue
    
            if len(line) == 0:
                # Blank line
                continue
        
            [k, v] = line.split(':=', 1)
            k = k.lower().strip()
            v = v.strip()
            
            # Output numbers should all be integers, to avoid differences in
            # floating point arithmetic between Python and C++ tests
            if v.isdigit():
                v = int(v)
                
            stats[k] = v
            
        self._stats = stats
        
    def _read_matrices(self):
        """
        Read in the matrices from the generated .bmp files the program created.
        Will populate self.{U, S, V, A_prime}
        """
        
        matrices = ['U', 'S', 'V', 'A_prime']
        
        for m in matrices:
            path = os.path.join(self._program_io_path(), m + '.bmp')
            img = Image.open(path).convert('L')
            # Not sure if we need to divide by 255
            setattr(self, m, np.array(img) / 255)

# Build executable before we can run it
FengwangSVD.build()

A = np.random.rand(100, 100)
fw_svd = FengwangSVD(A)
fw_svd.run()

print('Stats:', fw_svd.stats)
print(A - fw_svd.U@fw_svd.S@fw_svd.V.T) # I am probably doing it wrong??
print(A - fw_svd.A_prime)

Sending build context to Docker daemon  114.5MB
Step 1/5 : FROM gcc:latest
 ---> 2f9778ee181e
Step 2/5 : COPY ./fengwang-matrix /app
 ---> f173b0d6d5c0
Step 3/5 : WORKDIR /app
 ---> Running in 0c18307a29fb
Removing intermediate container 0c18307a29fb
 ---> c31f2c9df6b9
Step 4/5 : RUN make
 ---> Running in fe25160c0323
g++ -std=c++2a -Wall -Wextra -O2 -pthread -o svdimage main.cpp -lstdc++fs
Removing intermediate container fe25160c0323
 ---> e6ce6e6b66c5
Step 5/5 : CMD ["./svdimage"]
 ---> Running in e4791350b825
Removing intermediate container e4791350b825
 ---> 52aa90d9a90f
Successfully built 52aa90d9a90f
Successfully tagged fengwang-matrix-svd:latest
Stats: {'matrix-rows': 100, 'matrix-cols': 100, 'some-num': 1234567890}
[[-0.48290919 -1.09216649 -0.87856108 ... -0.87095071 -0.77971444
  -0.72041137]
 [-1.14448031 -0.26716412 -0.69826457 ... -0.72760621 -1.20536446
  -1.063098  ]
 [-0.32432421 -0.37200043 -0.957478   ... -1.27204529 -0.48498538
  -0.35285592]
 ...
 [-0.42126718 -0.77

The following prompts may be useful, but you don't have to use them.

## Introduction

Describe the objective of your study, citing prior work as appropriate (papers, websites, etc.).  There is no requirement on citation style, but please try to be consistent.

## Methods

## Results and interpretation

## Conclusions and open questions