Skip to content

Commit

Permalink
Merge pull request #3 from JosPolfliet/master
Browse files Browse the repository at this point in the history
  • Loading branch information
conradoqg committed Jan 1, 2018
2 parents 9f27cfd + eb87e0f commit 64aed7d
Show file tree
Hide file tree
Showing 8 changed files with 792 additions and 538 deletions.
106 changes: 92 additions & 14 deletions pandas_profiling/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,49 @@
# -*- coding: utf-8 -*-
"""Main module of pandas-profiling.
Docstring is compliant with NumPy/SciPy documentation standard:
https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt
See also for a short description of docstring:
https://stackoverflow.com/questions/3898572/what-is-the-standard-python-docstring-format
"""
import codecs
from .templates import template
from .base import describe, to_html
import pandas_profiling.templates as templates
from .describe import describe
from .report import to_html

NO_OUTPUTFILE = "pandas_profiling.no_outputfile"
DEFAULT_OUTPUTFILE = "pandas_profiling.default_outputfile"


class ProfileReport(object):
"""Generate a profile report from a Dataset stored as a pandas `DataFrame`.
Used has is it will output its content as an HTML report in a Jupyter notebook.
Attributes
----------
df : DataFrame
Data to be analyzed
bins : int
Number of bins in histogram
check_correlation : boolean
Whether or not to check correlation.
Since it's an expensive computation it can be deactivated for big datasets.
correlation_overrides : list
Variable names not to be rejected because they are correlated
pool_size : int
Number of workers in thread pool
Methods
-------
get_description
Return the description (a raw statistical summary) of the dataset.
"""
html = ''
file = None

def __init__(self, df, **kwargs):

"""Constructor see class documentation
"""
sample = kwargs.get('sample', df.head())

description_set = describe(df, **kwargs)
Expand All @@ -22,22 +54,49 @@ def __init__(self, df, **kwargs):
self.description_set = description_set

def get_description(self):
"""Return the description (a raw statistical summary) of the dataset.
Returns
-------
dict
Containing the following keys:
* table: general statistics on the dataset
* variables: summary statistics for each variable
* freq: frequency table
"""
return self.description_set

def get_rejected_variables(self, threshold=0.9):
""" return a list of variable names being rejected for high
correlation with one of remaining variables
Parameters:
----------
threshold: float (optional)
correlation value which is above the threshold are rejected
"""Return a list of variable names being rejected for high
correlation with one of remaining variables.
Parameters:
----------
threshold : float
Correlation value which is above the threshold are rejected
Returns
-------
list
The list of rejected variables or an empty list if the correlation has not been computed.
"""
variable_profile = self.description_set['variables']
return variable_profile.index[variable_profile.correlation > threshold].tolist()
result = []
if hasattr(variable_profile, 'correlation'):
result = variable_profile.index[variable_profile.correlation > threshold].tolist()
return result

def to_file(self, outputfile=DEFAULT_OUTPUTFILE):

"""Write the report to a file.
By default a name is generated.
Parameters:
----------
outputfile : str
The name or the path of the file to generale including the extension (.html).
"""

if outputfile != NO_OUTPUTFILE:
if outputfile == DEFAULT_OUTPUTFILE:
outputfile = 'profile_' + str(hash(self)) + ".html"
Expand All @@ -46,13 +105,32 @@ def to_file(self, outputfile=DEFAULT_OUTPUTFILE):
self.file.write(templates.template('wrapper').render(content=self.html))

def to_html(self):
""" return complete template as lengthy string
"""Generate and return complete template as lengthy string
for using with frameworks
Returns
-------
str
The HTML output.
"""
return templates.template('wrapper').render(content=self.html)

def _repr_html_(self):
"""Used to output the HTML representation to a Jupyter notebook
Returns
-------
str
The HTML internal representation.
"""
return self.html

def __str__(self):
"""Overwrite of the str method.
Returns
-------
str
A string representation of the object.
"""
return "Output written to file " + str(self.file.name)
Loading

0 comments on commit 64aed7d

Please sign in to comment.