Skip to content

Commit

Permalink
[compiler] implement composer classmap autoloading
Browse files Browse the repository at this point in the history
`autoload.classmap` is another popular method of classes
autoloading in composer.

Implementing this feature increases the number of composer packages
that can be used by KPHP code.

Classmap works like this:
for a given files list (dirs or regular files),
collect all files with `.inc` and `.php` extension recursively and
build autoloading maps for them.

Unlike PSR4, classmap doesn't require any conventions.
A file can have multiple classes, its name could be anything,
namespaces can be arbitrary as well.

It's hard to implement this feature in KPHP without actually parsing the php files.
As a compromise, we're scanning the classmap folders and
require all files found during the composer autoload file inclusion.

Fixes #49
  • Loading branch information
quasilyte committed Jul 12, 2022
1 parent 8438196 commit e9c55ae
Show file tree
Hide file tree
Showing 15 changed files with 465 additions and 240 deletions.
96 changes: 1 addition & 95 deletions compiler/compiler-core.cpp
Expand Up @@ -4,11 +4,8 @@

#include "compiler/compiler-core.h"

#include <dirent.h>

#include "common/algorithms/contains.h"
#include "common/wrappers/mkdir_recursive.h"
#include "common/smart_ptrs/unique_ptr_with_delete_function.h"

#include "compiler/const-manipulations.h"
#include "compiler/data/ffi-data.h"
Expand All @@ -18,80 +15,6 @@
#include "compiler/data/src-file.h"
#include "compiler/name-gen.h"

namespace {

void close_dir(DIR *d) {
closedir(d);
}

void collect_composer_folders(const std::string &path, std::vector<std::string> &result) {
// can't use nftw here as it doesn't provide a portable way to stop directory traversal;
// we don't want to visit *all* files in the vendor tree
//
// suppose we have this composer-generated layout:
// vendor/pkg1/
// * composer.json
// * src/
// vendor/ns/pkg2/
// * composer.json
// * src/
// all pkg directories can have a lot of files inside src/,
// if we can stop as soon as we find composer.json, a lot of
// redundant work is avoided

vk::unique_ptr_with_delete_function<DIR, close_dir> dp{opendir(path.c_str())};
if (dp == nullptr) {
kphp_warning(fmt_format("find composer files: opendir({}) failed: {}", path.c_str(), strerror(errno)));
return;
}

// since composer package can't have nested composer.json file, we stop
// directory traversal if we found it; otherwise we descend further;
// dirs contains all directories that we need to visit when descending
bool recurse = true;
std::vector<std::string> dirs;

while (const auto *entry = readdir(dp.get())) {
if (entry->d_name[0] == '.') {
continue;
}

if (std::strcmp(entry->d_name, "composer.json") == 0) {
result.push_back(path);
recurse = false;
break;
}

// by default, composer does no copy for packages; it creates a symlink instead
if (entry->d_type == DT_LNK) {
// collect only those links that point to a directory
auto link_path = path + "/" + entry->d_name;
struct stat link_info;
stat(link_path.c_str(), &link_info);
if (S_ISDIR(link_info.st_mode)) {
dirs.push_back(std::move(link_path));
}
} else if (entry->d_type == DT_DIR) {
dirs.emplace_back(path + "/" + entry->d_name);
}
}

if (recurse) {
for (const auto &dir : dirs) {
collect_composer_folders(dir, result);
}
}
}

// Collect all composer.json file roots that can be found in the given directory.
std::vector<std::string> find_composer_folders(const std::string &dir) {
std::vector<std::string> result;
collect_composer_folders(dir, result);
return result;
}

}

static FunctionPtr UNPARSED_BUT_REQUIRED_FUNC_PTR = FunctionPtr(reinterpret_cast<FunctionData *>(0x0001));

CompilerCore::CompilerCore() :
Expand Down Expand Up @@ -532,24 +455,7 @@ void CompilerCore::init_composer_class_loader() {
}

composer_class_loader.set_use_dev(settings().composer_autoload_dev.get());

composer_class_loader.load_root_file(settings().composer_root.get());

// We could traverse the composer file and collect all "repositories"
// and map them with "requirements" to get the dependency list,
// but some projects may use composer plugins that change composer
// files before "composer install" is invoked, so the final vendor
// folder may be generated from files that differ from the composer
// files that we can reach. To avoid that problem, we scan the vendor
// folder in order to collect all dependencies (both direct and indirect).

std::string vendor = settings().composer_root.get() + "vendor";
bool vendor_folder_exists = access(vendor.c_str(), F_OK) == 0;
if (vendor_folder_exists) {
for (const auto &composer_root : find_composer_folders(vendor)) {
composer_class_loader.load_file(composer_root);
}
}
composer_class_loader.load(settings().composer_root.get());
}


Expand Down
160 changes: 154 additions & 6 deletions compiler/composer.cpp
Expand Up @@ -6,11 +6,128 @@

#include <yaml-cpp/yaml.h> // using YAML parser to handle JSON files

#include <dirent.h>
#include <sys/stat.h> // TODO: remove when std::filesystem is used everywhere instead of stat
#include <filesystem>

#include "common/smart_ptrs/unique_ptr_with_delete_function.h"
#include "common/algorithms/contains.h"
#include "common/wrappers/fmt_format.h"
#include "compiler/kphp_assert.h"
#include "compiler/stage.h"

namespace {

void close_dir(DIR *d) {
closedir(d);
}

void collect_composer_folders(const std::string &path, std::vector<std::string> &result) {
// can't use nftw here as it doesn't provide a portable way to stop directory traversal;
// we don't want to visit *all* files in the vendor tree
//
// suppose we have this composer-generated layout:
// vendor/pkg1/
// * composer.json
// * src/
// vendor/ns/pkg2/
// * composer.json
// * src/
// all pkg directories can have a lot of files inside src/,
// if we can stop as soon as we find composer.json, a lot of
// redundant work is avoided
//
// TODO: rewrite using C++17 filesystem?

vk::unique_ptr_with_delete_function<DIR, close_dir> dp{opendir(path.c_str())};
if (dp == nullptr) {
kphp_warning(fmt_format("find composer files: opendir({}) failed: {}", path.c_str(), strerror(errno)));
return;
}

// since composer package can't have nested composer.json file, we stop
// directory traversal if we found it; otherwise we descend further;
// dirs contains all directories that we need to visit when descending
bool recurse = true;
std::vector<std::string> dirs;

while (const auto *entry = readdir(dp.get())) {
if (entry->d_name[0] == '.') {
continue;
}

if (std::strcmp(entry->d_name, "composer.json") == 0) {
result.push_back(path);
recurse = false;
break;
}

// by default, composer does no copy for packages; it creates a symlink instead
if (entry->d_type == DT_LNK) {
// collect only those links that point to a directory
auto link_path = path + "/" + entry->d_name + "/";
struct stat link_info;
stat(link_path.c_str(), &link_info);
if (S_ISDIR(link_info.st_mode)) {
dirs.push_back(std::move(link_path));
}
} else if (entry->d_type == DT_DIR) {
dirs.emplace_back(path + "/" + entry->d_name + "/");
}
}

if (recurse) {
for (const auto &dir : dirs) {
collect_composer_folders(dir, result);
}
}
}

// Collect all composer.json file roots that can be found in the given directory.
std::vector<std::string> find_composer_folders(const std::string &dir) {
std::vector<std::string> result;
collect_composer_folders(dir, result);
return result;
}

} // namespace

bool ComposerAutoloader::is_classmap_file(const std::string &filename) const noexcept {
return vk::contains(classmap_files_, filename);
}

void ComposerAutoloader::scan_classmap(const std::string &filename) {
// supporting the real composer classmap is cumbersome: it requires full PHP parsing to
// fetch all classes from files (the filename doesn't have to follow any conventions);
// we could also invoke php interpreter over vendor/composer/autoload_classmap.php to
// print a JSON dump of the generated classmap and then decode that, but then
// it will be impossible to compile a kphp program that uses a classmap without php interpreter;
// as an alternative, we add all classmap files to auto-required lists that will be
// included along "autoload.files" files, if some classes are not needed, they will be
// discarded after we compute actually used symbols
//
// this approach works well as long as there is no significant side effects related to
// the files being autoloaded (otherwise those side effects will trigger at different point in time)

const auto add_classmap_file = [&](const std::string &filename) {
classmap_files_.insert(filename);
files_to_require_.emplace_back(filename);
};

auto file_info = std::filesystem::status(filename);
kphp_error(file_info.type() != std::filesystem::file_type::not_found,
fmt_format("can't find {} classmap file", filename));
if (file_info.type() == std::filesystem::file_type::directory) {
for (const auto &entry : std::filesystem::directory_iterator(filename)) {
scan_classmap(entry.path().string());
}
} else if (file_info.type() == std::filesystem::file_type::regular) {
if (vk::string_view(filename).ends_with(".php") || vk::string_view(filename).ends_with(".inc")) {
add_classmap_file(filename);
}
}
}

std::string ComposerAutoloader::psr4_lookup_nocache(const std::string &class_name) const {
std::string prefix = class_name;

Expand Down Expand Up @@ -84,6 +201,26 @@ void ComposerAutoloader::set_use_dev(bool v) {
use_dev_ = v;
}

void ComposerAutoloader::load(const std::string &pkg_root) {
load_root_file(pkg_root);

// We could traverse the composer file and collect all "repositories"
// and map them with "requirements" to get the dependency list,
// but some projects may use composer plugins that change composer
// files before "composer install" is invoked, so the final vendor
// folder may be generated from files that differ from the composer
// files that we can reach. To avoid that problem, we scan the vendor
// folder in order to collect all dependencies (both direct and indirect).

std::string vendor = pkg_root + "vendor";
bool vendor_folder_exists = access(vendor.c_str(), F_OK) == 0;
if (vendor_folder_exists) {
for (const auto &composer_root : find_composer_folders(vendor)) {
load_file(composer_root);
}
}
}

void ComposerAutoloader::load_root_file(const std::string &pkg_root) {
kphp_assert(!pkg_root.empty() && pkg_root.back() == '/');
kphp_assert(autoload_filename_.empty());
Expand Down Expand Up @@ -119,6 +256,11 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil
// "": "fallback-dir/",
// <...>
// },
// "classmap": [
// "src/",
// "lib/file.php",
// <...>
// ],
// "files": [
// "file.php",
// <...>
Expand All @@ -136,9 +278,9 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil
// }
// }

auto filename = pkg_root + "/composer.json";
auto filename = pkg_root + "composer.json";

auto add_autoload_dir = [&](const std::string &prefix, std::string dir) {
auto add_autoload_psr4_dir = [&](const std::string &prefix, std::string dir) {
if (dir.empty()) {
dir = "./"; // composer interprets "" as "./" or "."
}
Expand All @@ -148,7 +290,7 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil
dir.push_back('/');
}

autoload_psr4_[prefix].emplace_back(pkg_root + "/" + dir);
autoload_psr4_[prefix].emplace_back(pkg_root + dir);
};

auto add_autoload_section = [&](YAML::Node autoload, bool require_files) {
Expand All @@ -160,20 +302,26 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil

if (kv.second.IsSequence()) {
for (const auto &dir : kv.second) {
add_autoload_dir(prefix, dir.as<std::string>());
add_autoload_psr4_dir(prefix, dir.as<std::string>());
}
} else if (kv.second.IsScalar()) {
add_autoload_dir(prefix, kv.second.as<std::string>());
add_autoload_psr4_dir(prefix, kv.second.as<std::string>());
} else {
kphp_error(false, fmt_format("load composer file {}: invalid autoload psr-4 item", filename.c_str()));
}
}

// https://getcomposer.org/doc/04-schema.md#classmap
const auto classmap_src = autoload["classmap"];
for (const auto &elem : classmap_src) {
scan_classmap(pkg_root + elem.as<std::string>());
}

if (require_files) {
// files that are required by the composer-generated autoload.php
// https://getcomposer.org/doc/04-schema.md#files
for (const auto &autoload_filename : autoload["files"]) {
files_to_require_.emplace_back(pkg_root + "/" + autoload_filename.as<std::string>());
files_to_require_.emplace_back(pkg_root + autoload_filename.as<std::string>());
}
}
};
Expand Down
24 changes: 15 additions & 9 deletions compiler/composer.h
Expand Up @@ -22,15 +22,7 @@ class ComposerAutoloader : private vk::not_copyable {
// inverse of the `composer --no-dev` option
void set_use_dev(bool v);

// load_file parses the "$pkg_root/composer.json" file and saves
// all relevant definitions inside loader;
// this method is not thread-safe and should be called only during the compiler init
void load_file(const std::string &pkg_root);

// load_root_file is like load_file, but should be only used
// for the main composer file;
// this method is not thread-safe and should be called only during the compiler init
void load_root_file(const std::string &pkg_root);
void load(const std::string&);

// psr4_lookup tries to find a filename that should contain
// a class class_name; the lookup is based on the loaded
Expand All @@ -43,17 +35,31 @@ class ComposerAutoloader : private vk::not_copyable {
return filename == autoload_filename_;
}

bool is_classmap_file(const std::string &filename) const noexcept;

const std::vector<std::string> &get_files_to_require() const noexcept {
return files_to_require_;
}

private:
// load_file parses the "$pkg_root/composer.json" file and saves
// all relevant definitions inside loader;
// this method is not thread-safe and should be called only during the compiler init
void load_file(const std::string &pkg_root);

// load_root_file is like load_file, but should be only used
// for the main composer file;
// this method is not thread-safe and should be called only during the compiler init
void load_root_file(const std::string &pkg_root);

void load_file(const std::string &filename, bool root);
std::string psr4_lookup_nocache(const std::string &class_name) const;
void scan_classmap(const std::string &filename);

bool use_dev_;
std::map<std::string, std::vector<std::string>, std::less<>> autoload_psr4_;
std::unordered_set<std::string> deps_;
std::unordered_set<std::string> classmap_files_;

std::string autoload_filename_;
std::vector<std::string> files_to_require_;
Expand Down
3 changes: 2 additions & 1 deletion compiler/data/class-data.cpp
Expand Up @@ -41,7 +41,8 @@ void ClassData::set_name_and_src_name(const std::string &full_name) {
std::string namespace_name = pos == std::string::npos ? "" : full_name.substr(0, pos);
std::string class_name = pos == std::string::npos ? full_name : full_name.substr(pos + 1);

this->can_be_php_autoloaded = file_id && namespace_name == file_id->namespace_name && class_name == file_id->short_file_name;
this->can_be_php_autoloaded = file_id && ((namespace_name == file_id->namespace_name && class_name == file_id->short_file_name) ||
(G->get_composer_autoloader().is_classmap_file(file_id->file_name)));
this->can_be_php_autoloaded |= this->is_builtin();

this->is_lambda = vk::string_view{full_name}.starts_with("Lambda$") || vk::string_view{full_name}.starts_with("ITyped$");
Expand Down

0 comments on commit e9c55ae

Please sign in to comment.