In [3]:
import aiida
from aiida import orm, load_profile
from aiida.orm import StructureData, ArrayData, TrajectoryData, QueryBuilder
from aiida.orm.utils import load_entity, load_code, load_computer, load_group, load_node
import sys, os
# import freud
import ase
import zipfile
from ase import Atoms
from ase.atoms import Atoms
from ase.calculators import calculator
from ase.calculators.calculator import Calculator, kpts2ndarray
from ase.io import read, write
from ase.calculators.abinit import Abinit
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from matplotlib.colors import Colormap as cmap
import matplotlib.cm
from collections import Counter
from pandas.plotting import scatter_matrix
import seaborn as sns
from math import pi
import numpy as np
from operator import itemgetter
from mpl_toolkits.mplot3d import Axes3D
import random
from functools import partial
import json
from numpy.linalg import pinv as inv
from chemiscope import write_input
# import rowan
from matminer.datasets import load_dataset
from matplotlib.lines import Line2D
from scipy.spatial import Voronoi, voronoi_plot_2d, ConvexHull, convex_hull_plot_2d, Delaunay, SphericalVoronoi
import landlab
from landlab import VoronoiDelaunayGrid, RasterModelGrid
from landlab.grid.voronoi  import simple_poly_area 
from shapely.geometry import LineString
from shapely.ops import polygonize, unary_union

#sklearn
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.linear_model import LinearRegression as LR
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, RidgeCV, LogisticRegression, LogisticRegressionCV
from sklearn.metrics.pairwise import linear_kernel, rbf_kernel
from sklearn.metrics import accuracy_score
from sklearn.kernel_ridge import KernelRidge
from sklearn.kernel_approximation import Nystroem
from sklearn.svm import SVC, SVR
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, validation_curve, learning_curve, GridSearchCV

# Librascal
from rascal.representations import SphericalInvariants as SOAP

#skcosmo
from skcosmo.preprocessing import SparseKernelCenterer as SKC
from skcosmo.feature_selection import FPS
from skcosmo.preprocessing import StandardFlexibleScaler, KernelNormalizer
from skcosmo.decomposition import PCovR
from skcosmo.decomposition import KPCovR
from skcosmo.preprocessing import KernelNormalizer as KFC
sns.set(style="white", palette="bright", color_codes=True)

In [8]:
def unit_normal(a, b, c):
    x = np.linalg.det([[1,a[1],a[2]],
         [1,b[1],b[2]],
         [1,c[1],c[2]]])
    y = np.linalg.det([[a[0],1,a[2]],
         [b[0],1,b[2]],
         [c[0],1,c[2]]])
    z = np.linalg.det([[a[0],a[1],1],
         [b[0],b[1],1],
         [c[0],c[1],1]])
    magnitude = (x**2 + y**2 + z**2)**.5
    return (x/magnitude, y/magnitude, z/magnitude)

#area of polygon poly
def poly_area(poly):
    if len(poly) < 3: # not a plane - no area
        return 0
    total = [0, 0, 0]
    N = len(poly)
    for i in range(N):
        vi1 = poly[i]
        vi2 = poly[(i+1) % N]
        prod = np.cross(vi1, vi2)
        total[0] += prod[0]
        total[1] += prod[1]
        total[2] += prod[2]
    result = np.dot(total, unit_normal(poly[0], poly[1], poly[2]))
    return abs(result/2)

In [3]:
def point_group(structure):
    global pg
    global o
    if sg==1: #spacegroup
        pg='1' #pointgroup
        o=1   #number of operations
    elif sg==2:
        pg='1_'
        o=2
    elif 3<=sg<=5:
        pg='2'
        o=2
    elif 6<=sg<=9:
        pg='m'
        o=2
    elif 10<=sg<=15:
        pg='2/m'
        o=4
    elif 16<=sg<=24:
        pg='222'
        o=4
    elif 25<=sg<=46:
        pg='mm2'
        o=4
    elif 47<=sg<=74:
        pg='mmm'
        o=8
    elif 75<=sg<=80:
        pg='4'
        o=4
    elif 81<=sg<=82:
        pg='4_'
        o=4
    elif 83<=sg<=88:
        pg='4/m'
        o=8
    elif 89<=sg<=98:
        pg='422'
        o=8
    elif 99<=sg<=110:
        pg='4mm'
        o=8
    elif 111<=sg<=122:
        pg='4_2m'
        o=8
    elif 123<=sg<=142:
        pg='4/mmm'
        o=16
    elif 143<=sg<=146:
        pg='3'
        o=3
    elif 147<=sg<=148:
        pg='3_'
        o=6
    elif 149<=sg<=155:
        pg='32'
        o=6
    elif 156<=sg<=161:
        pg='3m'
        o=6
    elif 162<=sg<=167:
        pg='3_m'
        o=12
    elif 168<=sg<=173:
        pg='6'
        o=6
    elif sg==174:
        pg='6_'
        o=6
    elif 175<=sg<=176:
        pg='6/m'
        o=12
    elif 177<=sg<=182:
        pg='622'
        o=12
    elif 183<=sg<=186:
        pg='6mm'
        o=12
    elif 187<=sg<=190:
        pg='6_m2'
        o=12
    elif 191<=sg<=194:
        pg='6/mmm'
        o=24
    elif 195<=sg<=199:
        pg='23'
        o=12
    elif 200<=sg<=206:
        pg='m_3'
        o=24
    elif 207<=sg<=214:
        pg='432'
        o=24
    elif 215<=sg<=220:
        pg='4_3m'
        o=24
    elif 221<=sg<=230:
        pg='m3_m'
        o=48
    
    return (pg,o)

def magic_four(lista):
    idx=[l for l in range(len(lista)) if lista[l] %4==0]
    return idx

def non_magic_four(lista):
    idx=[l for l in range(len(lista)) if lista[l] %4!=0]
    return idx

In [4]:
"""Counting number of atoms from string, for the pandas Materials Project 
dataset specifically"""   
def natoms(string):
    for item in string:
        if string[(string.find('Sites'))+8] == ')':
            n=(string.find('Sites'))+7
            return int(string[n])
        else:
            n1=(string.find('Sites'))+7
            n2=(string.find('Sites'))+8
            return int(string[n1]+string[n2])

In [5]:
def system(bl):
    global sys
    if bl=='cP' or bl=='cI' or bl=='cF':
        sys='cubic' 
    elif bl=='tP' or bl=='tI':
        sys='tetragonal'
    elif bl=='oP' or bl=='oI' or bl=='oF' or bl=='oC' or bl=='oA':
        sys='orthorhombic'
    elif bl=='mP' or bl=='mC':
        sys='monoclinic'
    elif bl=='hP':
        sys='hexagonal'
    elif bl=='hR':
        sys='trigonal'
    elif bl=='aP':
        sys='triclinic'
    return (sys)

In [10]:
pg_list=['mmm', '222', '2/m', '4_3m', '4/mmm', '1_', '3_m', '6/mmm', '4_2m',
       'm3_m', '3_', '23', 'm', '4_', '4mm', '6mm', '4/m', 'm_3', '422',
       'mm2', '6', '2', '3m', '1', '6_m2', '32', '4', '622', '6/m', '432',
       '6_', '3']
len(pg_list)


32

In [1]:
def inh_symm(pandas):
    global counts
    counts=pandas.value_counts()
    for i in range(len(pandas)):
        elem=pandas.iloc[i] #string
        if elem=='m' or elem=='2' or elem=='3' or elem=='1_':
            counts['1']+=1
        elif elem=='222' or elem=='4' or elem=='4_':
            counts['1']+=1
            counts['2']+=1
        elif elem=='mm2':
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
        elif elem=='2/m':
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['1_']+=1
        elif elem=='4_2m':
            counts['4']+=1
            counts['mm2']+=1
            counts['222']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
        elif elem=='4mm':
            counts['4']+=1
            counts['mm2']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
        elif elem=='422':
            counts['4']+=1
            counts['222']+=1
            counts['1']+=1
            counts['2']+=1
        elif elem=='4/m':
            counts['4']+=1
            counts['2/m']+=1
            counts['4_']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['1_']+=1
        elif elem=='mmm':
            counts['222']+=1
            counts['2/m']+=1
            counts['mm2']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['1_']+=1
        elif elem=='4/mmm':
            counts['4_2m']+=1
            counts['4mm']+=1
            counts['422']+=1
            counts['4/m']+=1
            counts['mmm']+=1
            counts['222']+=1
            counts['4']+=1
            counts['4_']+=1
            counts['2/m']+=1
            counts['mm2']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['1_']+=1
        elif elem=='3m' or elem=='6_':
            counts['1']+=1
            counts['3']+=1
            counts['m']+=1
        elif elem=='32' or elem=='6':
            counts['1']+=1
            counts['3']+=1
            counts['2']+=1
        elif elem=='3_':
            counts['1']+=1
            counts['3']+=1
            counts['1_']+=1
        elif elem=='3_m':
            counts['3_']+=1
            counts['32']+=1
            counts['3m']+=1
            counts['2/m']+=1
            counts['1_']+=1
            counts['1']+=1
            counts['3']+=1
            counts['m']+=1
            counts['2']+=1
        elif elem=='6/m':
            counts['3_']+=1
            counts['6_']+=1
            counts['6']+=1
            counts['2/m']+=1
            counts['1_']+=1
            counts['1']+=1
            counts['3']+=1
            counts['m']+=1
            counts['2']+=1
        elif elem=='622':
            counts['222']+=1
            counts['6']+=1
            counts['32']+=1
            counts['1']+=1
            counts['3']+=1
            counts['2']+=1
        elif elem=='6_m2':
            counts['32']+=1
            counts['6_']+=1
            counts['3m']+=1
            counts['mm2']+=1
            counts['1']+=1
            counts['3']+=1
            counts['m']+=1
            counts['2']+=1
        elif elem=='6mm':
            counts['3_']+=1
            counts['6']+=1
            counts['2/m']+=1
            counts['1_']+=1
            counts['1']+=1
            counts['3']+=1
            counts['m']+=1
            counts['2']+=1
        elif elem=='6/mmm':
            counts['6mm']+=1
            counts['6_m2']+=1
            counts['3_m']+=1
            counts['622']+=1
            counts['6/m']+=1
            counts['mmm']+=1
            counts['222']+=1
            counts['2/m']+=1
            counts['mm2']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['1_']+=1
            counts['3']+=1
            counts['3m']+=1
            counts['32']+=1
            counts['3_']+=1
            counts['6_']+=1
            counts['6']+=1

        elif elem=='23':
            counts['3']+=1
            counts['222']+=1
            counts['2']+=1
            counts['1']+=1
        elif elem=='4_3m':
            counts['4_2m']+=1
            counts['23']+=1
            counts['3m']+=1
            counts['4']+=1
            counts['mm2']+=1
            counts['222']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['3']+=1
        elif elem=='432':
            counts['422']+=1
            counts['23']+=1
            counts['32']+=1
            counts['4']+=1
            counts['222']+=1
            counts['1']+=1
            counts['2']+=1
            counts['3']+=1
        elif elem=='m_3':
            counts['mmm']+=1
            counts['23']+=1
            counts['3_']+=1
            counts['2/m']+=1
            counts['mm2']+=1
            counts['m']+=1
            counts['222']+=1
            counts['1']+=1
            counts['2']+=1
            counts['3']+=1
            counts['1_']+=1

        elif elem=='m3_m':
            counts['4_3m']+=1
            counts['432']+=1
            counts['m_3']+=1
            counts['3_m']+=1
            counts['4/mmm']+=1
            counts['23']+=1
            counts['4_2m']+=1
            counts['4mm']+=1
            counts['422']+=1
            counts['4/m']+=1
            counts['mmm']+=1
            counts['222']+=1
            counts['4']+=1
            counts['4_']+=1
            counts['2/m']+=1
            counts['mm2']+=1
            counts['1']+=1
            counts['2']+=1
            counts['m']+=1
            counts['1_']+=1
            counts['3_']+=1
            counts['32']+=1
            counts['3m']+=1    
            counts['3']+=1 
    return counts