In [None]:
from os import sys
# Path to workspace
sys.path.insert(0, '/workspace/3d-shapes-embeddings/contrib/sharp_features/')
sys.path.insert(0, '/workspace/dense-self-supervised-representation-learning-for-3D-shapes/')

import h5py
import torch
import numpy as np
from tqdm import tqdm
import k3d

In [None]:
device1, device2, device = 'cuda:3', 'cuda:3', 'cuda:3'

In [None]:
from torch.utils.data import Dataset, default_collate, DataLoader
from enum import Enum

class Modality(Enum):
    MESH = 'mesh'
    POINT_CLOUD = 'point_cloud'
    DEPTH_IMG = 'depth_images'
    SDF = 'sdf'


class CrossmodalDataset(Dataset):
    def __init__(self, data_path, modality, transform=None):
        super().__init__()
        self.modality = modality
        self.transform = transform
        self.file = h5py.File(data_path, 'r')

    def __getitem__(self, index):
        if self.modality is Modality.MESH:
            features = self.file['features'][index][:].reshape(-1, 15)
            neighbors = self.file['neighbors'][index][:].reshape(-1, 3)
            
            if self.transform is not None:
                features = self.transform(features)
                
                
            features = torch.from_numpy(features).float()
            neighbors = torch.from_numpy(neighbors).long()
        
            features = torch.permute(features, (1, 0))
            centers, corners, normals = features[:3], features[3:12], features[12:]
            corners = corners - np.concatenate([centers, centers, centers], 0)
            
            return centers, corners, normals, neighbors
        
        elif self.modality is Modality.POINT_CLOUD:
            points = self.file['points'][index][:]
            face_index = self.file['face_index'][index][:]
            
            if self.transform is not None:
                points = self.transform(points)
                
            points = torch.from_numpy(points).float()
            points = torch.permute(points, (1, 0))
            return points, torch.from_numpy(face_index).long()
        
    def __len__(self):
        return self.file['points'].shape[0]
    
    
class DoubleDataset(CrossmodalDataset):
    def __init__(self, **multimodal_dataset_kwargs):
        super().__init__(**multimodal_dataset_kwargs)

    def __getitem__(self, idx):
        return super().__getitem__(idx), super().__getitem__(idx)

    def __len__(self):
        return super().__len__()

    
class DoubleModalityDataset(Dataset):
    def __init__(self, dset1, dset2):
        super().__init__()
        self.dset1 = dset1
        self.dset2 = dset2
        
    def __getitem__(self, idx):
        return *self.dset1.__getitem__(idx), *self.dset2.__getitem__(idx)
    
    def __len__(self):
        return self.dset1.__len__()
        
        
        
def sample(x, num_points=1024):
    device = x.device
    B, C, N = x.shape
    centroids = torch.zeros(B, num_points, dtype=torch.long, device=device)
    distance = torch.ones(B, N, device=device) * 1e10
    farthest = torch.randint(0, N, (B,), dtype=torch.long, device=device)
    batch_indices = torch.arange(B, dtype=torch.long, device=device)
    for i in range(num_points):
        centroids[:, i] = farthest
        centroid = x[batch_indices, :, farthest].view(B, C, 1)
        dist = torch.sum((x - centroid) ** 2, 1)
        mask = dist < distance
        distance[mask] = dist[mask]
        farthest = torch.max(distance, -1)[1]

    return centroids


def move_to_device(data, device='cpu'):
    if isinstance(data, list):
        return [item.to(device) for item in data]
    else:
        return data.to(device)
        
        
def collate_clouds(data, num_points=1024, device='cpu'):
    batch, face_indexes = move_to_device(default_collate(data), device)
    
    centroids_idx = sample(batch, num_points)
    
    batch = torch.gather(batch, 2, centroids_idx.unsqueeze(1).expand(-1, batch.size(1), -1))
    face_indexes = torch.gather(face_indexes, 1, centroids_idx)
    
    return batch, face_indexes

        
def collate_meshes(data, device='cpu'):
    max_faces = 0
    centers = []
    corners = []
    normals = []
    neighbors = []
    for centers_, corners_, normals_, neighbors_ in data:
        max_faces = max(max_faces, neighbors_.shape[0])
    
    for centers_, corners_, normals_, neighbors_ in data:
        num_faces = neighbors_.shape[0]
        if num_faces < max_faces:
            fill_idx = np.random.choice(num_faces, max_faces - num_faces)
            centers.append(torch.concat([centers_, centers_[:, fill_idx]], dim=1))
            corners.append(torch.concat([corners_, corners_[:, fill_idx]], dim=1))
            normals.append(torch.concat([normals_, normals_[:, fill_idx]], dim=1))
            neighbors.append(torch.concat([neighbors_, neighbors_[fill_idx]]))
        else:
            centers.append(centers_)
            corners.append(corners_)
            normals.append(normals_)
            neighbors.append(neighbors_)
        
    centers = torch.stack(centers).to(device)
    corners = torch.stack(corners).to(device)
    normals = torch.stack(normals).to(device)
    neighbors = torch.stack(neighbors).to(device)
    
    return centers, corners, normals, neighbors

In [None]:
index = 0
with h5py.File('abc_train.hdf5') as h5r:
    pc_batch = [
        (h5r['points'][index][:].T, h5r['face_index'][index][:]),
        (h5r['points'][index + 1][:].T, h5r['face_index'][index + 1][:]),
        (h5r['points'][index + 2][:].T, h5r['face_index'][index + 2][:]),
        (h5r['points'][index + 3][:].T, h5r['face_index'][index + 3][:]),
        (h5r['points'][index + 4][:].T, h5r['face_index'][index + 4][:]),
    ]

In [None]:
def groupby(features, face_indexes, max_faces):
    '''
    :param features: features for all face/point/pixel/voxel, batch_size x feature_dim x n_features
    :param labels: patches/segmentation labels, batch_size x n_features
    '''    
    mask = face_indexes.unsqueeze(1) == torch.arange(max_faces).unsqueeze(1).to(face_indexes.device)
    zero = torch.FloatTensor([0]).to(face_indexes.device)
    grouped = torch.where(mask.unsqueeze(2), features.unsqueeze(1), zero)
    
    return grouped, mask


def get_patch_embeddings(features, labels, max_faces):
    """
    :param features: features for all face/point/pixel/voxel, batch_size x feature_dim x n_features
    :param labels: patches/segmentation labels, batch_size x n_features
    :return: pooled (BxFxN), counts (BxN)
    """
    grouped, mask = groupby(features, labels, max_faces)
    counts = mask.sum(axis=-1)
    counts_nonzero = torch.where(counts != 0, counts, 1)

    pooled = grouped.sum(dim=-1) / counts_nonzero.unsqueeze(2)

    return pooled.transpose(1, 2), counts


def face_indexes_to_patch_counts(face_indexes, max_faces):
    face_nums = face_indexes.max(1).values
    idx = torch.arange(max_faces).unsqueeze(0)\
          .expand(face_indexes.size(0), -1).to(face_indexes.device)
    return (idx < face_nums.unsqueeze(1)).long()

In [None]:
from torch.nn import functional as F

def patch_contrastive_loss(x1, x2, params):
    """
    x1 and x2 are tuples with not flattened embeddings and patch_counts
    """
    v1_embeddings, v1_patch_counts = x1
    v2_embeddings, v2_patch_counts = x2

    n_patches = v1_embeddings.size(2)

    v1_embeddings = F.normalize(v1_embeddings, dim=1)
    v2_embeddings = F.normalize(v2_embeddings, dim=1)

    embs = torch.cat((v1_embeddings, v2_embeddings), dim=2)
    # b x (2 * n_patches) x (2 * n_patches)
    logits = torch.bmm(embs.transpose(2, 1), embs) / params['tau']

    # discard self similarities
    mask = ~torch.eye(n_patches * 2, dtype=torch.bool, device=v1_embeddings.device)
    logits = (logits
              .masked_select(mask)
              .view(-1, 2 * n_patches, 2 * n_patches - 1)
              .transpose(2, 1)
              .contiguous()
              )

    # ignore error for empty patches
    empty_patches_mask = torch.cat(((v1_patch_counts == 0), (v2_patch_counts == 0)), dim=1)
    labels = torch.cat((torch.arange(n_patches) + n_patches - 1,
                        torch.arange(n_patches)
                        ), dim=0).to(v1_embeddings.device)
    ignore_label = torch.Tensor([-100]).to(v1_embeddings.device).long()
    labels = torch.where(empty_patches_mask, ignore_label, labels)

    return F.cross_entropy(logits, labels)

In [None]:
def multicollate(data, *collators):
    batches = [[] for i in range(len(collators))]
    for item in data:
        for i in range(len(collators)):
            batches[i].append(item[i])

    result = []

    for i, collator in enumerate(collators):
        result.append(collator(batches[i]))
    
    return result

In [None]:
collate = lambda data: multicollate(
    data,
    lambda x: collate_meshes(x, device=device),
    lambda x: collate_meshes(x, device=device),
    lambda x: collate_clouds(x, device=device),
    lambda x: collate_clouds(x, device=device),
)

In [None]:
from workspace.datasets.transforms import *

pdataset_train = DoubleDataset(data_path='abc_train.hdf5', modality=Modality.POINT_CLOUD,
                            transform=Compose(
        PointCloudNormalize(),
        RandomRotation(low=-45, high=45, axis='xyz'),
        RandomJitter(std=0.01, clip_bound=0.05)
    ),)
mdataset_train = DoubleDataset(data_path='abc_train.hdf5', modality=Modality.MESH,
                            transform=Compose(
        MeshNetRandomRotation(low=-45, high=45, axis='xyz'),
        MeshNetRandomJitter(std=0.01, clip_bound=0.05)
    ),)

train = DoubleModalityDataset(mdataset_train, pdataset_train)


pdataset_test = DoubleDataset(data_path='abc_test.hdf5', modality=Modality.POINT_CLOUD,
                            transform=Compose(
        PointCloudNormalize(),
        RandomRotation(low=-45, high=45, axis='xyz'),
        RandomJitter(std=0.01, clip_bound=0.05)
    ),)
mdataset_test = DoubleDataset(data_path='abc_test.hdf5', modality=Modality.MESH,
                             transform=Compose(
        MeshNetRandomRotation(low=-45, high=45, axis='xyz'),
        MeshNetRandomJitter(std=0.01, clip_bound=0.05)
    ),)

test = DoubleModalityDataset(mdataset_test, pdataset_test)



train_loader = DataLoader(train, batch_size=6, shuffle=False,
               collate_fn=collate
)

test_loader = DataLoader(test, batch_size=6, shuffle=False,
              collate_fn=collate
)

In [None]:
import torch.nn.functional as F

# def contrastive_loss(v1_emb, v2_emb, params):
#     device = v1_emb.device
#     v1_emb = F.normalize(v1_emb, dim=-1)
#     v2_emb = F.normalize(v2_emb, dim=-1)
    
#     v1_logits = v1_emb @ v2_emb.T / params['tau']
#     v2_logits = v2_emb @ v1_emb.T / params['tau']
    
#     labels = torch.arange(v1_emb.shape[0]).to(device)
    
#     loss1 = F.cross_entropy(v1_logits, labels)
#     loss2 = F.cross_entropy(v2_logits, labels)
    
#     return loss1 + loss2

def contrastive_loss(v1_embeddings, v2_embeddings, params):
    v1_embeddings = F.normalize(v1_embeddings, dim=1)
    v2_embeddings = F.normalize(v2_embeddings, dim=1)

    batch_size = v1_embeddings.size(0)
    embs = torch.cat((v1_embeddings, v2_embeddings), dim=0)
    logits = embs @ embs.transpose(1, 0) / params['tau']

    # discard self similarities
    mask = ~torch.eye(2 * batch_size, dtype=torch.bool, device=v1_embeddings.device)
    logits = (logits
              .masked_select(mask)
              .view(2 * batch_size, 2 * batch_size - 1)
              .contiguous()
              )

    labels = torch.cat((torch.arange(batch_size) + batch_size - 1,
                        torch.arange(batch_size)
                        ), dim=0).to(v1_embeddings.device)

    return F.cross_entropy(logits, labels)

In [None]:
class Transpose(torch.nn.Module):
    def __init__(self, *dims):
        super().__init__()
        self.dims = dims

    def forward(self, data):
        return data.transpose(*self.dims)
    

class MultiModalModel(torch.nn.Module):
    def __init__(self, model1, model2, model_output_dim, result_dim=64, hidden_dim=256):
        super().__init__()
        self.model1 = model1.to(device1)
        self.model2 = model2.to(device2)
        self.head1 = torch.nn.Sequential(
            Transpose(1, 2),
            torch.nn.Linear(model_output_dim, hidden_dim),
            Transpose(1, 2),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            Transpose(1, 2),
            torch.nn.Linear(hidden_dim, result_dim),
            Transpose(1, 2),
        ).to(device1)
        
        self.head2 = torch.nn.Sequential(
            Transpose(1, 2),
            torch.nn.Linear(model_output_dim, hidden_dim),
            Transpose(1, 2),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            Transpose(1, 2),
            torch.nn.Linear(hidden_dim, result_dim),
            Transpose(1, 2),
        ).to(device2)
        
        
    def forward(self, input1_1, input1_2, input2_1, input2_2):
        v1_1_emb = self.model1.forward_features(input1_1)
        v1_2_emb = self.model1.forward_features(input1_2)
        
        input2_1, face_indexes = input2_1
        input2_2, face_indexes = input2_2
        v2_1_emb = self.model2.forward_features(input2_1)
        v2_2_emb = self.model2.forward_features(input2_2)
        
        
        return (
            self.head1(v1_1_emb),
            self.head1(v1_2_emb),
            self.head2(v2_1_emb),
            self.head2(v2_2_emb),
            face_indexes
        )
    

    def get_embeddings(self, input1_1, input1_2, input2_1, input2_2):
        v1_1_emb = self.model1.forward_features(input1_1)
        v1_2_emb = self.model1.forward_features(input1_2)
        v2_1_emb = self.model2.forward_features(input2_1)
        v2_2_emb = self.model2.forward_features(input2_2)
        
        return v1_1_emb, v1_2_emb, v2_1_emb, v2_2_emb

In [None]:
from workspace.models.meshnet import MeshNet
from workspace.models.dgcnn import DGCNN

mnet = MeshNet(n_patches=5)
dgcnn = DGCNN(n_patches=5)

In [None]:
model = MultiModalModel(mnet, dgcnn, 512)

In [None]:
v1_patch_counts.shape

In [None]:
empty_patches_mask.shape

In [None]:
labels.shape

In [None]:
data1_1[0].shape

In [None]:
from copy import deepcopy

def forward( 
    model,
    batch, # raw data from dataloader
    logger, # neptune run
    mode # 'train'/'val'
): # -> loss

    data1_1, data1_2, data2_1, data2_2 = batch
    
    max_faces = data1_1[0].shape[-1]
    global last_batch
    last_batch = deepcopy(batch)

    out1_1, out1_2, out2_1, out2_2, face_indexes = model(data1_1, data1_2, data2_1, data2_2)
    
    pooled2_1, counts2_1 = get_patch_embeddings(out2_1, face_indexes, max_faces)
    pooled2_2, counts2_2 = get_patch_embeddings(out2_2, face_indexes, max_faces)
    face_counts = face_indexes_to_patch_counts(face_indexes, max_faces)
    
    #local inside figures
    pc_local_loss = patch_contrastive_loss(
        (pooled2_1, counts2_1),
        (pooled2_2, counts2_2),
        params
    )
    mesh_local_loss = patch_contrastive_loss(
        (out1_1, face_counts),
        (out1_2, face_counts),
        params
    )
    
    #local crossmodal loss
    local_crossmodal_loss = (
        patch_contrastive_loss(
            (pooled2_1, counts2_1),
            (out1_1, face_counts),
            params
        ) + 
        patch_contrastive_loss(
            (pooled2_1, counts2_1),
            (out1_2, face_counts),
            params
        ) +
        patch_contrastive_loss(
            (pooled2_2, counts2_2),
            (out1_1, face_counts),
            params
        ) + 
        patch_contrastive_loss(
            (pooled2_2, counts2_2),
            (out1_2, face_counts),
            params
        )
    )
    
    
    gout1_1 = out1_1.mean(-1)
    gout1_2 = out1_2.mean(-1)
    gout2_1 = out2_1.mean(-1)
    gout2_2 = out2_2.mean(-1)
    # crossmodal
    crossmodal_loss = contrastive_loss(gout1_1, gout2_1, params) +\
           contrastive_loss(gout1_2, gout2_2, params) +\
           contrastive_loss(gout1_1, gout2_2, params) +\
           contrastive_loss(gout1_2, gout2_1, params)
    
    # model level
    pc_loss = contrastive_loss(gout1_1, gout1_2, params)
    mesh_loss = contrastive_loss(gout2_1, gout2_2, params)
    
    
    return {
        'loss': (0.25 * crossmodal_loss + pc_loss + mesh_loss) +\
                (0.25 * local_crossmodal_loss + pc_local_loss + mesh_local_loss),
        'pc_loss': pc_loss,
        'pc_local_loss': pc_local_loss,
        'mesh_local_loss': mesh_local_loss,
        'mesh_loss': mesh_loss,
        'local_crossmodal_loss': 0.25 * local_crossmodal_loss,
        'crossmodal_loss:': 0.25 * crossmodal_loss
    }

In [None]:
import neptune.new as neptune
from workspace.utils.train_loop import *

params = {
    'name': 'Experiment_multimodal_abc',
    'dataset': 'abc',
    'batch_size': 8,
    'tau': 0.07,
    'n_output': 512,
    'total_epochs': 100,
    'lr': 5e-5,
    'weight_decay': 1e-5,
    'save_every': 100,
    'weights_root': '../weights/'
}

# tags
tags = ['abc']



logger = neptune.init(project='seals5454/crossmodal-exps-igor',
                      name=params['name'],
                      tags=tags,
                      api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmU'\
                                'uYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS'\
                                '5haSIsImFwaV9rZXkiOiI2NTIwODVkNC1hOTg5LTQ4NTAtY'\
                                'WRhNS0yMGY4MmQ1YzBmZWIifQ=='
                      )

logger['parameters'] = params

In [None]:
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=params['lr'],
    weight_decay=params['weight_decay']
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * params['total_epochs'])

In [None]:
train_model(model, params, logger,  train_loader, test_loader, optimizer, scheduler, forward)

## ВИЗУАЛИЗАЦИЯ


In [None]:
from torch.nn import functional as F
from workspace.datasets.transforms import *



def visualize_elements_heatmap_pc(point_cloud, features, anchor_idx=-1):
    '''
    :param point_cloud: point cloud, tensor of size (n_points, 3)
    :param features: point cloud features, tensor of size (n_points, emb_dim)
    '''
    if anchor_idx == -1:
        anchor_idx = np.random.randint(0, point_cloud.size(0), size=(1,))[0]
        
    features = F.normalize(features, dim=-1)
                
    sims = features[anchor_idx] @ features.t()
    plot = k3d.plot()
    plot += k3d.points(point_cloud, point_size=0.025, attribute=sims)
    plot += k3d.points(point_cloud[anchor_idx].unsqueeze(0), point_size=0.05)

    return plot


def visualize_elements_heatmap_mesh(mesh, features, anchor_idx=-1):
    '''
    :param mesh: tuple (vertices, faces)
    :param features: faces features, tensor of size (>=n_faces, emb_dim)
    '''
    vertices, faces = mesh
    vertices = PointCloudNormalize()(vertices)
    faces_num = faces.shape[0]
    features = features[:faces_num]
    
    if anchor_idx == -1:
        anchor_idx = np.random.randint(0, faces_num, size=(1,))[0]
        
    features = F.normalize(features, dim=-1)
    sims = features[anchor_idx] @ features.t()
    plot = k3d.plot()
    
    anchor_face = faces[anchor_idx]
    anchor_point = (vertices[anchor_face[0]] +
                    vertices[anchor_face[1]] +
                    vertices[anchor_face[2]]) / 3
    plot += k3d.mesh(vertices, faces, triangles_attribute=sims[:faces.shape[0]])
    plot += k3d.points(anchor_point[None, ...], point_size=0.1, color=0xff0000)
    
    return plot


def visualize_elements_heatmap_pc_to_mesh(point_cloud, mesh, features_pc, features_mesh, anchor_idx=-1):
    '''
    :param point_cloud: point cloud, tensor of size (n_points, 3)
    :param mesh: tuple (vertices, faces)
    :param features_pc: point cloud features, tensor of size (n_points, emb_dim)
    :param features_mesh: faces features, tensor of size (>=n_faces, emb_dim)
    '''
    vertices, faces = mesh
    faces_num = faces.shape[0]
    features_mesh = features_mesh[:faces_num]
    features_mesh = F.normalize(features_mesh, dim=-1)
    features_pc = F.normalize(features_pc, dim=-1)
    
    
    if anchor_idx == -1:
        anchor_idx = np.random.randint(0, point_cloud.size(0), size=(1,))[0]
        
    sims_pc = features_pc[anchor_idx] @ features_pc.T
    sims_mesh = features_pc[anchor_idx] @ features_mesh.T
    plot = k3d.plot()
    vertices = PointCloudNormalize()(vertices)
    vertices[:, 0] += 2
    
    plot += k3d.mesh(vertices, faces, triangles_attribute=sims_mesh)
    plot += k3d.points(point_cloud, point_size=0.025, attribute=sims_pc)
    plot += k3d.points(point_cloud[anchor_idx].unsqueeze(0), point_size=0.1, color=0xff0000)

    return plot


def visualize_elements_heatmap_mesh_to_pc(point_cloud, mesh, features_pc, features_mesh, anchor_idx=-1):
    '''
    :param point_cloud: point cloud, tensor of size (n_points, 3)
    :param mesh: tuple (vertices, faces)
    :param features_pc: point cloud features, tensor of size (n_points, emb_dim)
    :param features_mesh: faces features, tensor of size (>=n_faces, emb_dim)
    '''
    vertices, faces = mesh
    faces_num = faces.shape[0]
    features_mesh = features_mesh[:faces_num]
    features_mesh = F.normalize(features_mesh, dim=-1)
    features_pc = F.normalize(features_pc, dim=-1)
    
    
    if anchor_idx == -1:
        anchor_idx = np.random.randint(0, faces_num, size=(1,))[0]
        
    sims_pc = features_mesh[anchor_idx] @ features_pc.T
    sims_mesh = features_mesh[anchor_idx] @ features_mesh.T
    plot = k3d.plot()
    vertices = PointCloudNormalize()(vertices)
    vertices[:, 0] += 2
    
    anchor_face = faces[anchor_idx]
    anchor_point = (vertices[anchor_face[0]] +
                    vertices[anchor_face[1]] +
                    vertices[anchor_face[2]]) / 3
    
    plot += k3d.mesh(vertices, faces, triangles_attribute=sims_mesh)
    plot += k3d.points(point_cloud, point_size=0.025, attribute=sims_pc)
    plot += k3d.points(anchor_point[None, ...], point_size=0.1, color=0xff0000)

    return plot

In [None]:
state_dict = torch.load('exp41')

In [None]:
model.load_state_dict(state_dict)

In [None]:
index = 0

feats = model(*next(iter(train_loader)))
point_cloud = next(iter(train_loader))[3][0][index].T.cpu()
pc_features = feats[3][index].T.detach().cpu()
m_features = feats[0][index].T.detach().cpu()

In [None]:
with h5py.File('abc_train.hdf5', 'r') as h5r:
    mesh = h5r['vertices'][index][:].reshape(-1, 3), h5r['faces'][index][:].reshape(-1, 3)

In [None]:
point_cloud.shape

In [None]:
visualize_elements_heatmap_pc_to_mesh(point_cloud, mesh, pc_features, m_features)

Активации

In [None]:
batch = next(iter(train_loader))
with torch.no_grad():
    activations = model(*batch)
    fm1, fm2, fp1, fp2, _ = activations

In [None]:
fm1 = F.normalize(fm1.mean(-1).detach().cpu(), dim=-1)
fm2 = F.normalize(fm2.mean(-1).detach().cpu(), dim=-1)
fp1 = F.normalize(fp1.mean(-1).detach().cpu(), dim=-1)
fp2 = F.normalize(fp2.mean(-1).detach().cpu(), dim=-1)

In [None]:
fm1 @ fm2.T

In [None]:
fp1 @ fp2.T

In [None]:
fm1 @ fp1.T